From 3347fa0928210d96aaa2bd6cd5a8391d5e630873 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 Sep 2016 15:49:32 -0400 Subject: workqueue: make workqueue available early during boot Workqueue is currently initialized in an early init call; however, there are cases where early boot code has to be split and reordered to come after workqueue initialization or the same code path which makes use of workqueues is used both before workqueue initailization and after. The latter cases have to gate workqueue usages with keventd_up() tests, which is nasty and easy to get wrong. Workqueue usages have become widespread and it'd be a lot more convenient if it can be used very early from boot. This patch splits workqueue initialization into two steps. workqueue_init_early() which sets up the basic data structures so that workqueues can be created and work items queued, and workqueue_init() which actually brings up workqueues online and starts executing queued work items. The former step can be done very early during boot once memory allocation, cpumasks and idr are initialized. The latter right after kthreads become available. This allows work item queueing and canceling from very early boot which is what most of these use cases want. * As systemd_wq being initialized doesn't indicate that workqueue is fully online anymore, update keventd_up() to test wq_online instead. The follow-up patches will get rid of all its usages and the function itself. * Flushing doesn't make sense before workqueue is fully initialized. The flush functions trigger WARN and return immediately before fully online. * Work items are never in-flight before fully online. Canceling can always succeed by skipping the flush step. * Some code paths can no longer assume to be called with irq enabled as irq is disabled during early boot. Use irqsave/restore operations instead. v2: Watchdog init, which requires timer to be running, moved from workqueue_init_early() to workqueue_init(). Signed-off-by: Tejun Heo Suggested-by: Linus Torvalds Link: http://lkml.kernel.org/r/CA+55aFx0vPuMuxn00rBSM192n-Du5uxy+4AvKa0SBSOVJeuCGg@mail.gmail.com --- include/linux/workqueue.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 26cc1df280d6..91d416f9c0a7 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -358,6 +358,8 @@ extern struct workqueue_struct *system_freezable_wq; extern struct workqueue_struct *system_power_efficient_wq; extern struct workqueue_struct *system_freezable_power_efficient_wq; +extern bool wq_online; + extern struct workqueue_struct * __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6); @@ -594,7 +596,7 @@ static inline bool schedule_delayed_work(struct delayed_work *dwork, */ static inline bool keventd_up(void) { - return system_wq != NULL; + return wq_online; } #ifndef CONFIG_SMP @@ -631,4 +633,7 @@ int workqueue_online_cpu(unsigned int cpu); int workqueue_offline_cpu(unsigned int cpu); #endif +int __init workqueue_init_early(void); +int __init workqueue_init(void); + #endif -- cgit v1.2.3 From 863b710b664bdcb90c0c682ee24adb368f497a5b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 Sep 2016 15:49:34 -0400 Subject: workqueue: remove keventd_up() keventd_up() no longer has in-kernel users. Remove it and make wq_online static. Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 10 ---------- kernel/workqueue.c | 2 +- 2 files changed, 1 insertion(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 91d416f9c0a7..56417133c672 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -358,8 +358,6 @@ extern struct workqueue_struct *system_freezable_wq; extern struct workqueue_struct *system_power_efficient_wq; extern struct workqueue_struct *system_freezable_power_efficient_wq; -extern bool wq_online; - extern struct workqueue_struct * __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6); @@ -591,14 +589,6 @@ static inline bool schedule_delayed_work(struct delayed_work *dwork, return queue_delayed_work(system_wq, dwork, delay); } -/** - * keventd_up - is workqueue initialized yet? - */ -static inline bool keventd_up(void) -{ - return wq_online; -} - #ifndef CONFIG_SMP static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg) { diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 15d0811c9e91..ad0cd439223b 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -290,7 +290,7 @@ module_param_named(disable_numa, wq_disable_numa, bool, 0444); static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT); module_param_named(power_efficient, wq_power_efficient, bool, 0444); -bool wq_online; /* can kworkers be created yet? */ +static bool wq_online; /* can kworkers be created yet? */ static bool wq_numa_enabled; /* unbound NUMA affinity enabled */ -- cgit v1.2.3 From 43ece27e70b2c756e45306791955507f0533e248 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Fri, 23 Sep 2016 17:19:41 +0200 Subject: iio:trigger: Add helper function to verify that a trigger belongs to the same device Some triggers can only be attached to the IIO device that corresponds to the same physical device. Currently each driver that requires this implements its own trigger validation function. Introduce a new helper function called iio_trigger_validate_own_device() that can be used to do this check. Having a common implementation avoids code duplication and unnecessary boiler-plate code. Signed-off-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-trigger.c | 21 +++++++++++++++++++++ include/linux/iio/trigger.h | 2 ++ 2 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c index e1e104845e38..978729f6d7c4 100644 --- a/drivers/iio/industrialio-trigger.c +++ b/drivers/iio/industrialio-trigger.c @@ -717,6 +717,27 @@ bool iio_trigger_using_own(struct iio_dev *indio_dev) } EXPORT_SYMBOL(iio_trigger_using_own); +/** + * iio_trigger_validate_own_device - Check if a trigger and IIO device belong to + * the same device + * @trig: The IIO trigger to check + * @indio_dev: the IIO device to check + * + * This function can be used as the validate_device callback for triggers that + * can only be attached to their own device. + * + * Return: 0 if both the trigger and the IIO device belong to the same + * device, -EINVAL otherwise. + */ +int iio_trigger_validate_own_device(struct iio_trigger *trig, + struct iio_dev *indio_dev) +{ + if (indio_dev->dev.parent != trig->dev.parent) + return -EINVAL; + return 0; +} +EXPORT_SYMBOL(iio_trigger_validate_own_device); + void iio_device_register_trigger_consumer(struct iio_dev *indio_dev) { indio_dev->groups[indio_dev->groupcounter++] = diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index 4f1154f7a33c..ea08302f2d7b 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -170,6 +170,8 @@ void iio_trigger_free(struct iio_trigger *trig); */ bool iio_trigger_using_own(struct iio_dev *indio_dev); +int iio_trigger_validate_own_device(struct iio_trigger *trig, + struct iio_dev *indio_dev); #else struct iio_trigger; -- cgit v1.2.3 From 0023e67dd8951737588b8af0469446df3ec52afe Mon Sep 17 00:00:00 2001 From: Matt Ranostay Date: Fri, 23 Sep 2016 23:04:07 -0700 Subject: iio: inkern: add iio_read_channel_offset helper Allow access to underlying channel IIO_CHAN_INFO_OFFSET from a consumer. Signed-off-by: Matt Ranostay Signed-off-by: Jonathan Cameron --- drivers/iio/inkern.c | 39 ++++++++++++++++++++++++++------------- include/linux/iio/consumer.h | 13 +++++++++++++ 2 files changed, 39 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c index c4757e6367e7..29df11572858 100644 --- a/drivers/iio/inkern.c +++ b/drivers/iio/inkern.c @@ -658,6 +658,31 @@ err_unlock: } EXPORT_SYMBOL_GPL(iio_convert_raw_to_processed); +static int iio_read_channel_attribute(struct iio_channel *chan, + int *val, int *val2, + enum iio_chan_info_enum attribute) +{ + int ret; + + mutex_lock(&chan->indio_dev->info_exist_lock); + if (chan->indio_dev->info == NULL) { + ret = -ENODEV; + goto err_unlock; + } + + ret = iio_channel_read(chan, val, val2, attribute); +err_unlock: + mutex_unlock(&chan->indio_dev->info_exist_lock); + + return ret; +} + +int iio_read_channel_offset(struct iio_channel *chan, int *val, int *val2) +{ + return iio_read_channel_attribute(chan, val, val2, IIO_CHAN_INFO_OFFSET); +} +EXPORT_SYMBOL_GPL(iio_read_channel_offset); + int iio_read_channel_processed(struct iio_channel *chan, int *val) { int ret; @@ -687,19 +712,7 @@ EXPORT_SYMBOL_GPL(iio_read_channel_processed); int iio_read_channel_scale(struct iio_channel *chan, int *val, int *val2) { - int ret; - - mutex_lock(&chan->indio_dev->info_exist_lock); - if (chan->indio_dev->info == NULL) { - ret = -ENODEV; - goto err_unlock; - } - - ret = iio_channel_read(chan, val, val2, IIO_CHAN_INFO_SCALE); -err_unlock: - mutex_unlock(&chan->indio_dev->info_exist_lock); - - return ret; + return iio_read_channel_attribute(chan, val, val2, IIO_CHAN_INFO_SCALE); } EXPORT_SYMBOL_GPL(iio_read_channel_scale); diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index 9edccfba1ffb..638157234357 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -235,6 +235,19 @@ int iio_write_channel_raw(struct iio_channel *chan, int val); int iio_get_channel_type(struct iio_channel *channel, enum iio_chan_type *type); +/** + * iio_read_channel_offset() - read the offset value for a channel + * @chan: The channel being queried. + * @val: First part of value read back. + * @val2: Second part of value read back. + * + * Note returns a description of what is in val and val2, such + * as IIO_VAL_INT_PLUS_MICRO telling us we have a value of val + * + val2/1e6 + */ +int iio_read_channel_offset(struct iio_channel *chan, int *val, + int *val2); + /** * iio_read_channel_scale() - read the scale value for a channel * @chan: The channel being queried. -- cgit v1.2.3 From a9a0d64a8b7af406f03b660cbad948cfd34ed2b0 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Sat, 1 Oct 2016 15:27:18 +0530 Subject: iio: Declare event_attrs field of iio_info structure as const The event_attrs field of iio_info structure is only initialized once whenever an object of iio_info is created. After that this field is never modified again anywhere in the kernel. So, declare event_attrs field of iio_info as a const struct attribute_group. Checked for occurences throughout the kernel using grep and coccinelle. Signed-off-by: Bhumika Goyal Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index b4a0679e4a49..4591d8ea41bd 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -381,7 +381,7 @@ struct iio_dev; **/ struct iio_info { struct module *driver_module; - struct attribute_group *event_attrs; + const struct attribute_group *event_attrs; const struct attribute_group *attrs; int (*read_raw)(struct iio_dev *indio_dev, -- cgit v1.2.3 From f3b0deea89039373f0d22eafd1ff65a36e957266 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Mon, 26 Sep 2016 20:20:16 -0400 Subject: include: linux: iio: add IIO_ATTR_{RO, WO, RW} and IIO_DEVICE_ATTR_{RO, WO, RW} macros Add new macros: IIO_ATTR_RO, IIO_ATTR_WO, IIO_ATTR_RW, IIO_DEVICE_ATTR_RO, IIO_DEVICE_ATTR_WO and IIO_DEVICE_ATTR_RW to reduce the amount of boiler plate code that is needed for creating new attributes. This mimics the *_RO, *_WO, and *_RW macros that are found in include/linux/device.h and include/linux/sysfs.h. Signed-off-by: Brian Masney Acked-by: Greg Kroah-Hartman Signed-off-by: Jonathan Cameron --- include/linux/iio/sysfs.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/sysfs.h b/include/linux/iio/sysfs.h index 9cd8f747212f..ce9426c507fd 100644 --- a/include/linux/iio/sysfs.h +++ b/include/linux/iio/sysfs.h @@ -55,10 +55,34 @@ struct iio_const_attr { { .dev_attr = __ATTR(_name, _mode, _show, _store), \ .address = _addr } +#define IIO_ATTR_RO(_name, _addr) \ + { .dev_attr = __ATTR_RO(_name), \ + .address = _addr } + +#define IIO_ATTR_WO(_name, _addr) \ + { .dev_attr = __ATTR_WO(_name), \ + .address = _addr } + +#define IIO_ATTR_RW(_name, _addr) \ + { .dev_attr = __ATTR_RW(_name), \ + .address = _addr } + #define IIO_DEVICE_ATTR(_name, _mode, _show, _store, _addr) \ struct iio_dev_attr iio_dev_attr_##_name \ = IIO_ATTR(_name, _mode, _show, _store, _addr) +#define IIO_DEVICE_ATTR_RO(_name, _addr) \ + struct iio_dev_attr iio_dev_attr_##_name \ + = IIO_ATTR_RO(_name, _addr) + +#define IIO_DEVICE_ATTR_WO(_name, _addr) \ + struct iio_dev_attr iio_dev_attr_##_name \ + = IIO_ATTR_WO(_name, _addr) + +#define IIO_DEVICE_ATTR_RW(_name, _addr) \ + struct iio_dev_attr iio_dev_attr_##_name \ + = IIO_ATTR_RW(_name, _addr) + #define IIO_DEVICE_ATTR_NAMED(_vname, _name, _mode, _show, _store, _addr) \ struct iio_dev_attr iio_dev_attr_##_vname \ = IIO_ATTR(_name, _mode, _show, _store, _addr) -- cgit v1.2.3 From 3d42de25d290fdfe604835d1b389845b8cba5bff Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 4 Oct 2016 20:34:35 -0400 Subject: x86/fpu, kvm: Remove KVM vcpu->fpu_counter With the removal of the lazy FPU code, this field is no longer used. Get rid of it. Signed-off-by: Rik van Riel Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Thomas Gleixner Cc: pbonzini@redhat.com Link: http://lkml.kernel.org/r/1475627678-20788-7-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kvm/x86.c | 4 +--- include/linux/kvm_host.h | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 59d7761fd6df..2c7e775d7295 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7348,10 +7348,8 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { - if (!vcpu->guest_fpu_loaded) { - vcpu->fpu_counter = 0; + if (!vcpu->guest_fpu_loaded) return; - } vcpu->guest_fpu_loaded = 0; copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9c28b4d4c90b..4e6905cd1e8e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -224,7 +224,6 @@ struct kvm_vcpu { int fpu_active; int guest_fpu_loaded, guest_xcr0_loaded; - unsigned char fpu_counter; struct swait_queue_head wq; struct pid *pid; int sigset_active; -- cgit v1.2.3 From a13e831fcaa7e8af0387aef629d1835cf39c59f0 Mon Sep 17 00:00:00 2001 From: Eva Rachel Retuya Date: Wed, 5 Oct 2016 11:06:21 +0800 Subject: staging: iio: ad7192: implement IIO_CHAN_INFO_SAMP_FREQ This driver predates the availability of IIO_CHAN_INFO_SAMP_FREQ attribute wherein usage has some advantages like it can be accessed by in-kernel consumers as well as reduces the code size. Therefore, use IIO_CHAN_INFO_SAMP_FREQ to implement the sampling_frequency attribute instead of using IIO_DEV_ATTR_SAMP_FREQ() macro. Move code from the functions associated with IIO_DEV_ATTR_SAMP_FREQ() into respective read and write hooks with the mask set to IIO_CHAN_INFO_SAMP_FREQ. Signed-off-by: Eva Rachel Retuya Signed-off-by: Jonathan Cameron --- drivers/staging/iio/adc/ad7192.c | 84 ++++++++++++---------------------- include/linux/iio/adc/ad_sigma_delta.h | 1 + 2 files changed, 30 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/drivers/staging/iio/adc/ad7192.c b/drivers/staging/iio/adc/ad7192.c index 1cf6b79801a9..bfa12ceb1e1f 100644 --- a/drivers/staging/iio/adc/ad7192.c +++ b/drivers/staging/iio/adc/ad7192.c @@ -322,57 +322,6 @@ out: return ret; } -static ssize_t ad7192_read_frequency(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct iio_dev *indio_dev = dev_to_iio_dev(dev); - struct ad7192_state *st = iio_priv(indio_dev); - - return sprintf(buf, "%d\n", st->mclk / - (st->f_order * 1024 * AD7192_MODE_RATE(st->mode))); -} - -static ssize_t ad7192_write_frequency(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t len) -{ - struct iio_dev *indio_dev = dev_to_iio_dev(dev); - struct ad7192_state *st = iio_priv(indio_dev); - unsigned long lval; - int div, ret; - - ret = kstrtoul(buf, 10, &lval); - if (ret) - return ret; - if (lval == 0) - return -EINVAL; - - ret = iio_device_claim_direct_mode(indio_dev); - if (ret) - return ret; - - div = st->mclk / (lval * st->f_order * 1024); - if (div < 1 || div > 1023) { - ret = -EINVAL; - goto out; - } - - st->mode &= ~AD7192_MODE_RATE(-1); - st->mode |= AD7192_MODE_RATE(div); - ad_sd_write_reg(&st->sd, AD7192_REG_MODE, 3, st->mode); - -out: - iio_device_release_direct_mode(indio_dev); - - return ret ? ret : len; -} - -static IIO_DEV_ATTR_SAMP_FREQ(S_IWUSR | S_IRUGO, - ad7192_read_frequency, - ad7192_write_frequency); - static ssize_t ad7192_show_scale_available(struct device *dev, struct device_attribute *attr, char *buf) @@ -471,7 +420,6 @@ static IIO_DEVICE_ATTR(ac_excitation_en, S_IRUGO | S_IWUSR, AD7192_REG_MODE); static struct attribute *ad7192_attributes[] = { - &iio_dev_attr_sampling_frequency.dev_attr.attr, &iio_dev_attr_in_v_m_v_scale_available.dev_attr.attr, &iio_dev_attr_in_voltage_scale_available.dev_attr.attr, &iio_dev_attr_bridge_switch_en.dev_attr.attr, @@ -484,7 +432,6 @@ static const struct attribute_group ad7192_attribute_group = { }; static struct attribute *ad7195_attributes[] = { - &iio_dev_attr_sampling_frequency.dev_attr.attr, &iio_dev_attr_in_v_m_v_scale_available.dev_attr.attr, &iio_dev_attr_in_voltage_scale_available.dev_attr.attr, &iio_dev_attr_bridge_switch_en.dev_attr.attr, @@ -536,6 +483,10 @@ static int ad7192_read_raw(struct iio_dev *indio_dev, if (chan->type == IIO_TEMP) *val -= 273 * ad7192_get_temp_scale(unipolar); return IIO_VAL_INT; + case IIO_CHAN_INFO_SAMP_FREQ: + *val = st->mclk / + (st->f_order * 1024 * AD7192_MODE_RATE(st->mode)); + return IIO_VAL_INT; } return -EINVAL; @@ -548,7 +499,7 @@ static int ad7192_write_raw(struct iio_dev *indio_dev, long mask) { struct ad7192_state *st = iio_priv(indio_dev); - int ret, i; + int ret, i, div; unsigned int tmp; ret = iio_device_claim_direct_mode(indio_dev); @@ -572,6 +523,22 @@ static int ad7192_write_raw(struct iio_dev *indio_dev, break; } break; + case IIO_CHAN_INFO_SAMP_FREQ: + if (!val) { + ret = -EINVAL; + break; + } + + div = st->mclk / (val * st->f_order * 1024); + if (div < 1 || div > 1023) { + ret = -EINVAL; + break; + } + + st->mode &= ~AD7192_MODE_RATE(-1); + st->mode |= AD7192_MODE_RATE(div); + ad_sd_write_reg(&st->sd, AD7192_REG_MODE, 3, st->mode); + break; default: ret = -EINVAL; } @@ -585,7 +552,14 @@ static int ad7192_write_raw_get_fmt(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, long mask) { - return IIO_VAL_INT_PLUS_NANO; + switch (mask) { + case IIO_CHAN_INFO_SCALE: + return IIO_VAL_INT_PLUS_NANO; + case IIO_CHAN_INFO_SAMP_FREQ: + return IIO_VAL_INT; + default: + return -EINVAL; + } } static const struct iio_info ad7192_info = { diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index e7fdec4db9da..5ba430cc9a87 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -136,6 +136,7 @@ int ad_sd_validate_trigger(struct iio_dev *indio_dev, struct iio_trigger *trig); .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | \ BIT(IIO_CHAN_INFO_OFFSET), \ .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE), \ + .info_mask_shared_by_all = BIT(IIO_CHAN_INFO_SAMP_FREQ), \ .scan_index = (_si), \ .scan_type = { \ .sign = 'u', \ -- cgit v1.2.3 From 4be0542073a33cc063b6a8f8fb367536e234e7aa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 29 Aug 2016 08:08:29 +0100 Subject: dma-buf: Introduce fence_get_rcu_safe() This variant of fence_get_rcu() takes an RCU protected pointer to a fence and carefully returns a reference to the fence ensuring that it is not reallocated as it does. This is required when mixing fences and SLAB_DESTROY_BY_RCU - although it serves a more pedagogical function atm Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Sumit Semwal Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org Reviewed-by: Daniel Vetter Signed-off-by: Sumit Semwal Link: http://patchwork.freedesktop.org/patch/msgid/20160829070834.22296-6-chris@chris-wilson.co.uk --- include/linux/fence.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fence.h b/include/linux/fence.h index 0d763053f97a..c9c5ba98c302 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -182,6 +182,16 @@ void fence_init(struct fence *fence, const struct fence_ops *ops, void fence_release(struct kref *kref); void fence_free(struct fence *fence); +/** + * fence_put - decreases refcount of the fence + * @fence: [in] fence to reduce refcount of + */ +static inline void fence_put(struct fence *fence) +{ + if (fence) + kref_put(&fence->refcount, fence_release); +} + /** * fence_get - increases refcount of the fence * @fence: [in] fence to increase refcount of @@ -210,13 +220,49 @@ static inline struct fence *fence_get_rcu(struct fence *fence) } /** - * fence_put - decreases refcount of the fence - * @fence: [in] fence to reduce refcount of + * fence_get_rcu_safe - acquire a reference to an RCU tracked fence + * @fence: [in] pointer to fence to increase refcount of + * + * Function returns NULL if no refcount could be obtained, or the fence. + * This function handles acquiring a reference to a fence that may be + * reallocated within the RCU grace period (such as with SLAB_DESTROY_BY_RCU), + * so long as the caller is using RCU on the pointer to the fence. + * + * An alternative mechanism is to employ a seqlock to protect a bunch of + * fences, such as used by struct reservation_object. When using a seqlock, + * the seqlock must be taken before and checked after a reference to the + * fence is acquired (as shown here). + * + * The caller is required to hold the RCU read lock. */ -static inline void fence_put(struct fence *fence) +static inline struct fence *fence_get_rcu_safe(struct fence * __rcu *fencep) { - if (fence) - kref_put(&fence->refcount, fence_release); + do { + struct fence *fence; + + fence = rcu_dereference(*fencep); + if (!fence || !fence_get_rcu(fence)) + return NULL; + + /* The atomic_inc_not_zero() inside fence_get_rcu() + * provides a full memory barrier upon success (such as now). + * This is paired with the write barrier from assigning + * to the __rcu protected fence pointer so that if that + * pointer still matches the current fence, we know we + * have successfully acquire a reference to it. If it no + * longer matches, we are holding a reference to some other + * reallocated pointer. This is possible if the allocator + * is using a freelist like SLAB_DESTROY_BY_RCU where the + * fence remains valid for the RCU grace period, but it + * may be reallocated. When using such allocators, we are + * responsible for ensuring the reference we get is to + * the right fence, as below. + */ + if (fence == rcu_access_pointer(*fencep)) + return rcu_pointer_handoff(fence); + + fence_put(fence); + } while (1); } int fence_signal(struct fence *fence); -- cgit v1.2.3 From 61e84623ace35ce48975e8f90bbbac7557c43d61 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Fri, 7 Oct 2016 22:04:33 -0400 Subject: net: centralize net_device min/max MTU checking While looking into an MTU issue with sfc, I started noticing that almost every NIC driver with an ndo_change_mtu function implemented almost exactly the same range checks, and in many cases, that was the only practical thing their ndo_change_mtu function was doing. Quite a few drivers have either 68, 64, 60 or 46 as their minimum MTU value checked, and then various sizes from 1500 to 65535 for their maximum MTU value. We can remove a whole lot of redundant code here if we simple store min_mtu and max_mtu in net_device, and check against those in net/core/dev.c's dev_set_mtu(). In theory, there should be zero functional change with this patch, it just puts the infrastructure in place. Subsequent patches will attempt to start using said infrastructure, with theoretically zero change in functionality. CC: netdev@vger.kernel.org Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++++ net/core/dev.c | 13 +++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 136ae6bbe81e..fbdf923af4d3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1506,6 +1506,8 @@ enum netdev_priv_flags { * @if_port: Selectable AUI, TP, ... * @dma: DMA channel * @mtu: Interface MTU value + * @min_mtu: Interface Minimum MTU value + * @max_mtu: Interface Maximum MTU value * @type: Interface hardware type * @hard_header_len: Maximum hardware header length. * @@ -1726,6 +1728,8 @@ struct net_device { unsigned char dma; unsigned int mtu; + unsigned int min_mtu; + unsigned int max_mtu; unsigned short type; unsigned short hard_header_len; diff --git a/net/core/dev.c b/net/core/dev.c index f1fe26f66458..f376639e8774 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6499,9 +6499,18 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) if (new_mtu == dev->mtu) return 0; - /* MTU must be positive. */ - if (new_mtu < 0) + /* MTU must be positive, and in range */ + if (new_mtu < 0 || new_mtu < dev->min_mtu) { + net_err_ratelimited("%s: Invalid MTU %d requested, hw min %d\n", + dev->name, new_mtu, dev->min_mtu); return -EINVAL; + } + + if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) { + net_err_ratelimited("%s: Invalid MTU %d requested, hw max %d\n", + dev->name, new_mtu, dev->min_mtu); + return -EINVAL; + } if (!netif_device_present(dev)) return -ENODEV; -- cgit v1.2.3 From cf53b1da73bdf940f1523ec5a7d375d7056c759c Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 11 Oct 2016 13:04:09 -0700 Subject: Revert "net: Add driver helper functions to determine checksum offloadability" This reverts commit 6ae23ad36253a8033c5714c52b691b84456487c5. The code has been in kernel since 4.4 but there are no in tree code that uses. Unused code is broken code, remove it. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 78 -------------------------- net/core/dev.c | 136 ---------------------------------------------- 2 files changed, 214 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fbdf923af4d3..bf341b65ca5e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2653,71 +2653,6 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, remcsum_unadjust((__sum16 *)ptr, grc->delta); } -struct skb_csum_offl_spec { - __u16 ipv4_okay:1, - ipv6_okay:1, - encap_okay:1, - ip_options_okay:1, - ext_hdrs_okay:1, - tcp_okay:1, - udp_okay:1, - sctp_okay:1, - vlan_okay:1, - no_encapped_ipv6:1, - no_not_encapped:1; -}; - -bool __skb_csum_offload_chk(struct sk_buff *skb, - const struct skb_csum_offl_spec *spec, - bool *csum_encapped, - bool csum_help); - -static inline bool skb_csum_offload_chk(struct sk_buff *skb, - const struct skb_csum_offl_spec *spec, - bool *csum_encapped, - bool csum_help) -{ - if (skb->ip_summed != CHECKSUM_PARTIAL) - return false; - - return __skb_csum_offload_chk(skb, spec, csum_encapped, csum_help); -} - -static inline bool skb_csum_offload_chk_help(struct sk_buff *skb, - const struct skb_csum_offl_spec *spec) -{ - bool csum_encapped; - - return skb_csum_offload_chk(skb, spec, &csum_encapped, true); -} - -static inline bool skb_csum_off_chk_help_cmn(struct sk_buff *skb) -{ - static const struct skb_csum_offl_spec csum_offl_spec = { - .ipv4_okay = 1, - .ip_options_okay = 1, - .ipv6_okay = 1, - .vlan_okay = 1, - .tcp_okay = 1, - .udp_okay = 1, - }; - - return skb_csum_offload_chk_help(skb, &csum_offl_spec); -} - -static inline bool skb_csum_off_chk_help_cmn_v4_only(struct sk_buff *skb) -{ - static const struct skb_csum_offl_spec csum_offl_spec = { - .ipv4_okay = 1, - .ip_options_okay = 1, - .tcp_okay = 1, - .udp_okay = 1, - .vlan_okay = 1, - }; - - return skb_csum_offload_chk_help(skb, &csum_offl_spec); -} - static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, @@ -3961,19 +3896,6 @@ static inline bool can_checksum_protocol(netdev_features_t features, } } -/* Map an ethertype into IP protocol if possible */ -static inline int eproto_to_ipproto(int eproto) -{ - switch (eproto) { - case htons(ETH_P_IP): - return IPPROTO_IP; - case htons(ETH_P_IPV6): - return IPPROTO_IPV6; - default: - return -1; - } -} - #ifdef CONFIG_BUG void netdev_rx_csum_fault(struct net_device *dev); #else diff --git a/net/core/dev.c b/net/core/dev.c index f376639e8774..6498cc2ba8f6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -139,7 +139,6 @@ #include #include #include -#include #include #include "net-sysfs.h" @@ -2492,141 +2491,6 @@ out: } EXPORT_SYMBOL(skb_checksum_help); -/* skb_csum_offload_check - Driver helper function to determine if a device - * with limited checksum offload capabilities is able to offload the checksum - * for a given packet. - * - * Arguments: - * skb - sk_buff for the packet in question - * spec - contains the description of what device can offload - * csum_encapped - returns true if the checksum being offloaded is - * encpasulated. That is it is checksum for the transport header - * in the inner headers. - * checksum_help - when set indicates that helper function should - * call skb_checksum_help if offload checks fail - * - * Returns: - * true: Packet has passed the checksum checks and should be offloadable to - * the device (a driver may still need to check for additional - * restrictions of its device) - * false: Checksum is not offloadable. If checksum_help was set then - * skb_checksum_help was called to resolve checksum for non-GSO - * packets and when IP protocol is not SCTP - */ -bool __skb_csum_offload_chk(struct sk_buff *skb, - const struct skb_csum_offl_spec *spec, - bool *csum_encapped, - bool csum_help) -{ - struct iphdr *iph; - struct ipv6hdr *ipv6; - void *nhdr; - int protocol; - u8 ip_proto; - - if (skb->protocol == htons(ETH_P_8021Q) || - skb->protocol == htons(ETH_P_8021AD)) { - if (!spec->vlan_okay) - goto need_help; - } - - /* We check whether the checksum refers to a transport layer checksum in - * the outermost header or an encapsulated transport layer checksum that - * corresponds to the inner headers of the skb. If the checksum is for - * something else in the packet we need help. - */ - if (skb_checksum_start_offset(skb) == skb_transport_offset(skb)) { - /* Non-encapsulated checksum */ - protocol = eproto_to_ipproto(vlan_get_protocol(skb)); - nhdr = skb_network_header(skb); - *csum_encapped = false; - if (spec->no_not_encapped) - goto need_help; - } else if (skb->encapsulation && spec->encap_okay && - skb_checksum_start_offset(skb) == - skb_inner_transport_offset(skb)) { - /* Encapsulated checksum */ - *csum_encapped = true; - switch (skb->inner_protocol_type) { - case ENCAP_TYPE_ETHER: - protocol = eproto_to_ipproto(skb->inner_protocol); - break; - case ENCAP_TYPE_IPPROTO: - protocol = skb->inner_protocol; - break; - } - nhdr = skb_inner_network_header(skb); - } else { - goto need_help; - } - - switch (protocol) { - case IPPROTO_IP: - if (!spec->ipv4_okay) - goto need_help; - iph = nhdr; - ip_proto = iph->protocol; - if (iph->ihl != 5 && !spec->ip_options_okay) - goto need_help; - break; - case IPPROTO_IPV6: - if (!spec->ipv6_okay) - goto need_help; - if (spec->no_encapped_ipv6 && *csum_encapped) - goto need_help; - ipv6 = nhdr; - nhdr += sizeof(*ipv6); - ip_proto = ipv6->nexthdr; - break; - default: - goto need_help; - } - -ip_proto_again: - switch (ip_proto) { - case IPPROTO_TCP: - if (!spec->tcp_okay || - skb->csum_offset != offsetof(struct tcphdr, check)) - goto need_help; - break; - case IPPROTO_UDP: - if (!spec->udp_okay || - skb->csum_offset != offsetof(struct udphdr, check)) - goto need_help; - break; - case IPPROTO_SCTP: - if (!spec->sctp_okay || - skb->csum_offset != offsetof(struct sctphdr, checksum)) - goto cant_help; - break; - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_DEST: { - u8 *opthdr = nhdr; - - if (protocol != IPPROTO_IPV6 || !spec->ext_hdrs_okay) - goto need_help; - - ip_proto = opthdr[0]; - nhdr += (opthdr[1] + 1) << 3; - - goto ip_proto_again; - } - default: - goto need_help; - } - - /* Passed the tests for offloading checksum */ - return true; - -need_help: - if (csum_help && !skb_shinfo(skb)->gso_size) - skb_checksum_help(skb); -cant_help: - return false; -} -EXPORT_SYMBOL(__skb_csum_offload_chk); - __be16 skb_network_protocol(struct sk_buff *skb, int *depth) { __be16 type = skb->protocol; -- cgit v1.2.3 From c3aaa403840a5ccd305fb5e73f3cbfac6453b5e5 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Fri, 14 Oct 2016 05:19:17 -0400 Subject: qed: Pass MAC hints to VFs Some hypervisors can support MAC hints to their VFs. Even though we don't have such a hypervisor API in linux, we add sufficient logic for the VF to be able to receive such hints and set the mac accordingly - as long as the VF has not been set with a MAC already. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_vf.c | 4 ++-- drivers/net/ethernet/qlogic/qede/qede_main.c | 6 +++++- include/linux/qed/qed_eth_if.h | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index abf5bf11f865..f580bf4c97f0 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -1230,8 +1230,8 @@ static void qed_handle_bulletin_change(struct qed_hwfn *hwfn) is_mac_exist = qed_vf_bulletin_get_forced_mac(hwfn, mac, &is_mac_forced); - if (is_mac_exist && is_mac_forced && cookie) - ops->force_mac(cookie, mac); + if (is_mac_exist && cookie) + ops->force_mac(cookie, mac, !!is_mac_forced); /* Always update link configuration according to bulletin */ qed_link_update(hwfn); diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 343038ca047d..9866d952e3e1 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -171,10 +171,14 @@ static struct pci_driver qede_pci_driver = { #endif }; -static void qede_force_mac(void *dev, u8 *mac) +static void qede_force_mac(void *dev, u8 *mac, bool forced) { struct qede_dev *edev = dev; + /* MAC hints take effect only if we haven't set one already */ + if (is_valid_ether_addr(edev->ndev->dev_addr) && !forced) + return; + ether_addr_copy(edev->ndev->dev_addr, mac); ether_addr_copy(edev->primary_mac, mac); } diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 33c24ebc9b7f..1c779486c30d 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -129,7 +129,7 @@ struct qed_tunn_params { struct qed_eth_cb_ops { struct qed_common_cb_ops common; - void (*force_mac) (void *dev, u8 *mac); + void (*force_mac) (void *dev, u8 *mac, bool forced); }; #ifdef CONFIG_DCB -- cgit v1.2.3 From 7b7e70f979e34ed84d725eab8ea42921ab6f42e3 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Fri, 14 Oct 2016 05:19:20 -0400 Subject: qed*: Allow unicast filtering Apparently qede fails to set IFF_UNICAST_FLT, and as a result is not actually performing unicast MAC filtering. While we're at it - relax a hard-coded limitation that limits each interface into using at most 15 unicast MAC addresses before turning promiscuous. Instead utilize the HW resources to their limit. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 12 ++++++++++-- drivers/net/ethernet/qlogic/qede/qede_main.c | 4 +++- include/linux/qed/qed_eth_if.h | 1 + 3 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index ddd410a91e13..6b0e22d9fe4c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1652,6 +1652,7 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev, if (IS_PF(cdev)) { int max_vf_vlan_filters = 0; + int max_vf_mac_filters = 0; if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) { for_each_hwfn(cdev, i) @@ -1665,11 +1666,18 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev, info->num_queues = cdev->num_hwfns; } - if (IS_QED_SRIOV(cdev)) + if (IS_QED_SRIOV(cdev)) { max_vf_vlan_filters = cdev->p_iov_info->total_vfs * QED_ETH_VF_NUM_VLAN_FILTERS; - info->num_vlan_filters = RESC_NUM(&cdev->hwfns[0], QED_VLAN) - + max_vf_mac_filters = cdev->p_iov_info->total_vfs * + QED_ETH_VF_NUM_MAC_FILTERS; + } + info->num_vlan_filters = RESC_NUM(QED_LEADING_HWFN(cdev), + QED_VLAN) - max_vf_vlan_filters; + info->num_mac_filters = RESC_NUM(QED_LEADING_HWFN(cdev), + QED_MAC) - + max_vf_mac_filters; ether_addr_copy(info->port_mac, cdev->hwfns[0].hw_info.hw_mac_addr); diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 6c2b09c255d5..0e483afc2b87 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -2365,6 +2365,8 @@ static void qede_init_ndev(struct qede_dev *edev) qede_set_ethtool_ops(ndev); + ndev->priv_flags = IFF_UNICAST_FLT; + /* user-changeble features */ hw_features = NETIF_F_GRO | NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | @@ -3937,7 +3939,7 @@ static void qede_config_rx_mode(struct net_device *ndev) /* Check for promiscuous */ if ((ndev->flags & IFF_PROMISC) || - (uc_count > 15)) { /* @@@TBD resource allocation - 1 */ + (uc_count > edev->dev_info.num_mac_filters - 1)) { accept_flags = QED_FILTER_RX_MODE_TYPE_PROMISC; } else { /* Add MAC filters according to the unicast secondary macs */ diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 1c779486c30d..15130805d792 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -23,6 +23,7 @@ struct qed_dev_eth_info { u8 port_mac[ETH_ALEN]; u8 num_vlan_filters; + u16 num_mac_filters; /* Legacy VF - this affects the datapath, so qede has to know */ bool is_legacy; -- cgit v1.2.3 From a6e78b3e1406575323b30b65890ee3c29930fb27 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Mon, 17 Oct 2016 17:34:39 +0530 Subject: video: Add new aspect ratios for HDMI 2.0 HDMI 2.0/CEA-861-F introduces two new aspect ratios: - 64:27 - 256:135 This patch adds enumeration for the new aspect ratios in the existing aspect ratio list. V2: rebase V3: rebase V4: Added r-b from Jose, Ack by Tomi Signed-off-by: Shashank Sharma Reviewed-by: Sean Paul Reviewed-by: Jose Abreu Acked-by: Tomi Valkeinen Cc: Daniel Vetter Cc: Emil Velikov Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1476705880-15600-4-git-send-email-shashank.sharma@intel.com --- drivers/video/hdmi.c | 4 ++++ include/linux/hdmi.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/drivers/video/hdmi.c b/drivers/video/hdmi.c index 162689227a23..1cf907ecded4 100644 --- a/drivers/video/hdmi.c +++ b/drivers/video/hdmi.c @@ -533,6 +533,10 @@ hdmi_picture_aspect_get_name(enum hdmi_picture_aspect picture_aspect) return "4:3"; case HDMI_PICTURE_ASPECT_16_9: return "16:9"; + case HDMI_PICTURE_ASPECT_64_27: + return "64:27"; + case HDMI_PICTURE_ASPECT_256_135: + return "256:135"; case HDMI_PICTURE_ASPECT_RESERVED: return "Reserved"; } diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index e9744202fa29..edbb4fc674ed 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -78,6 +78,8 @@ enum hdmi_picture_aspect { HDMI_PICTURE_ASPECT_NONE, HDMI_PICTURE_ASPECT_4_3, HDMI_PICTURE_ASPECT_16_9, + HDMI_PICTURE_ASPECT_64_27, + HDMI_PICTURE_ASPECT_256_135, HDMI_PICTURE_ASPECT_RESERVED, }; -- cgit v1.2.3 From 664fcf123a30edf16b47d2ce1f610d654ba917b2 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 16 Oct 2016 19:56:51 +0200 Subject: net: phy: Threaded interrupts allow some simplification The PHY interrupts are now handled in a threaded interrupt handler, which can sleep. The work queue is no longer needed, phy_change() can be called directly. phy_mac_interrupt() still needs to be safe to call in interrupt context, so keep the work queue, and use a helper to call phy_change(). Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 45 +++++++++++++++++++++++++------------------- drivers/net/phy/phy_device.c | 2 +- include/linux/phy.h | 5 +++-- 3 files changed, 30 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 25f2b296aaba..bb673c63c85c 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -664,7 +664,7 @@ static void phy_error(struct phy_device *phydev) * @phy_dat: phy_device pointer * * Description: When a PHY interrupt occurs, the handler disables - * interrupts, and schedules a work task to clear the interrupt. + * interrupts, and uses phy_change to handle the interrupt. */ static irqreturn_t phy_interrupt(int irq, void *phy_dat) { @@ -673,15 +673,10 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) if (PHY_HALTED == phydev->state) return IRQ_NONE; /* It can't be ours. */ - /* The MDIO bus is not allowed to be written in interrupt - * context, so we need to disable the irq here. A work - * queue will write the PHY to disable and clear the - * interrupt, and then reenable the irq line. - */ disable_irq_nosync(irq); atomic_inc(&phydev->irq_disable); - queue_work(system_power_efficient_wq, &phydev->phy_queue); + phy_change(phydev); return IRQ_HANDLED; } @@ -766,12 +761,6 @@ int phy_stop_interrupts(struct phy_device *phydev) free_irq(phydev->irq, phydev); - /* Cannot call flush_scheduled_work() here as desired because - * of rtnl_lock(), but we do not really care about what would - * be done, except from enable_irq(), so cancel any work - * possibly pending and take care of the matter below. - */ - cancel_work_sync(&phydev->phy_queue); /* If work indeed has been cancelled, disable_irq() will have * been left unbalanced from phy_interrupt() and enable_irq() * has to be called so that other devices on the line work. @@ -784,14 +773,11 @@ int phy_stop_interrupts(struct phy_device *phydev) EXPORT_SYMBOL(phy_stop_interrupts); /** - * phy_change - Scheduled by the phy_interrupt/timer to handle PHY changes - * @work: work_struct that describes the work to be done + * phy_change - Called by the phy_interrupt to handle PHY changes + * @phydev: phy_device struct that interrupted */ -void phy_change(struct work_struct *work) +void phy_change(struct phy_device *phydev) { - struct phy_device *phydev = - container_of(work, struct phy_device, phy_queue); - if (phy_interrupt_is_valid(phydev)) { if (phydev->drv->did_interrupt && !phydev->drv->did_interrupt(phydev)) @@ -832,6 +818,18 @@ phy_err: phy_error(phydev); } +/** + * phy_change_work - Scheduled by the phy_mac_interrupt to handle PHY changes + * @work: work_struct that describes the work to be done + */ +void phy_change_work(struct work_struct *work) +{ + struct phy_device *phydev = + container_of(work, struct phy_device, phy_queue); + + phy_change(phydev); +} + /** * phy_stop - Bring down the PHY link, and stop checking the status * @phydev: target phy_device struct @@ -1116,6 +1114,15 @@ void phy_state_machine(struct work_struct *work) PHY_STATE_TIME * HZ); } +/** + * phy_mac_interrupt - MAC says the link has changed + * @phydev: phy_device struct with changed link + * @new_link: Link is Up/Down. + * + * Description: The MAC layer is able indicate there has been a change + * in the PHY link status. Set the new link status, and trigger the + * state machine, work a work queue. + */ void phy_mac_interrupt(struct phy_device *phydev, int new_link) { phydev->link = new_link; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index e977ba931878..ac440a815353 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -347,7 +347,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, mutex_init(&dev->lock); INIT_DELAYED_WORK(&dev->state_queue, phy_state_machine); - INIT_WORK(&dev->phy_queue, phy_change); + INIT_WORK(&dev->phy_queue, phy_change_work); /* Request the appropriate module unconditionally; don't * bother trying to do so only if it isn't already loaded, diff --git a/include/linux/phy.h b/include/linux/phy.h index e25f1830fbcf..c47378c93607 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -343,7 +343,7 @@ struct phy_c45_device_ids { * giving up on the current attempt at acquiring a link * irq: IRQ number of the PHY's interrupt (-1 if none) * phy_timer: The timer for handling the state machine - * phy_queue: A work_queue for the interrupt + * phy_queue: A work_queue for the phy_mac_interrupt * attached_dev: The attached enet driver's device instance ptr * adjust_link: Callback for the enet controller to respond to * changes in the link state. @@ -802,7 +802,8 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner); int phy_drivers_register(struct phy_driver *new_driver, int n, struct module *owner); void phy_state_machine(struct work_struct *work); -void phy_change(struct work_struct *work); +void phy_change(struct phy_device *phydev); +void phy_change_work(struct work_struct *work); void phy_mac_interrupt(struct phy_device *phydev, int new_link); void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); -- cgit v1.2.3 From bb6869b2147817385e0261f928b942f466f74a63 Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Tue, 18 Oct 2016 10:39:06 +0100 Subject: remoteproc: st_slim_rproc: add a slimcore rproc driver slim core is used as a basis for many IPs in the STi chipsets such as fdma and demux. To avoid duplicating the elf loading code in each device driver a slim rproc driver has been created. This driver is designed to be used by other device drivers such as fdma, or demux whose IP is based around a slim core. The device driver can call slim_rproc_alloc() to allocate a slim rproc and slim_rproc_put() when finished. This driver takes care of ioremapping the slim registers (dmem, imem, slimcore, peripherals), whose offsets and sizes can change between IP's. It also obtains and enables any clocks used by the device. This approach avoids having a double mapping of the registers as slim_rproc does not register its own platform device. It also maps well to device tree abstraction as it allows us to have one dt node for the whole device. All of the generic rproc elf loading code can be reused, and we provide start() stop() hooks to start and stop the slim core once the firmware has been loaded. This has been tested successfully with fdma driver. Signed-off-by: Peter Griffin Signed-off-by: Vinod Koul --- drivers/remoteproc/Kconfig | 7 +- drivers/remoteproc/Makefile | 1 + drivers/remoteproc/st_slim_rproc.c | 364 +++++++++++++++++++++++++++++++ include/linux/remoteproc/st_slim_rproc.h | 58 +++++ 4 files changed, 428 insertions(+), 2 deletions(-) create mode 100644 drivers/remoteproc/st_slim_rproc.c create mode 100644 include/linux/remoteproc/st_slim_rproc.h (limited to 'include/linux') diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index f396bfef5d42..9270c8e596f7 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -58,7 +58,6 @@ config DA8XX_REMOTEPROC tristate "DA8xx/OMAP-L13x remoteproc support" depends on ARCH_DAVINCI_DA8XX select CMA if MMU - select REMOTEPROC select RPMSG_VIRTIO help Say y here to support DA8xx/OMAP-L13x remote processors via the @@ -99,10 +98,10 @@ config QCOM_WCNSS_PIL tristate "Qualcomm WCNSS Peripheral Image Loader" depends on OF && ARCH_QCOM depends on QCOM_SMEM + depends on REMOTEPROC select QCOM_MDT_LOADER select QCOM_SCM select QCOM_WCNSS_IRIS - select REMOTEPROC help Say y here to support the Peripheral Image Loader for the Qualcomm Wireless Connectivity Subsystem. @@ -116,4 +115,8 @@ config ST_REMOTEPROC processor framework. This can be either built-in or a loadable module. +config ST_SLIM_REMOTEPROC + tristate + select REMOTEPROC + endmenu diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile index 6dfb62ed643f..924f0cb25470 100644 --- a/drivers/remoteproc/Makefile +++ b/drivers/remoteproc/Makefile @@ -16,3 +16,4 @@ obj-$(CONFIG_QCOM_Q6V5_PIL) += qcom_q6v5_pil.o obj-$(CONFIG_QCOM_WCNSS_IRIS) += qcom_wcnss_iris.o obj-$(CONFIG_QCOM_WCNSS_PIL) += qcom_wcnss.o obj-$(CONFIG_ST_REMOTEPROC) += st_remoteproc.o +obj-$(CONFIG_ST_SLIM_REMOTEPROC) += st_slim_rproc.o diff --git a/drivers/remoteproc/st_slim_rproc.c b/drivers/remoteproc/st_slim_rproc.c new file mode 100644 index 000000000000..1484e9717946 --- /dev/null +++ b/drivers/remoteproc/st_slim_rproc.c @@ -0,0 +1,364 @@ +/* + * SLIM core rproc driver + * + * Copyright (C) 2016 STMicroelectronics + * + * Author: Peter Griffin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "remoteproc_internal.h" + +/* SLIM core registers */ +#define SLIM_ID_OFST 0x0 +#define SLIM_VER_OFST 0x4 + +#define SLIM_EN_OFST 0x8 +#define SLIM_EN_RUN BIT(0) + +#define SLIM_CLK_GATE_OFST 0xC +#define SLIM_CLK_GATE_DIS BIT(0) +#define SLIM_CLK_GATE_RESET BIT(2) + +#define SLIM_SLIM_PC_OFST 0x20 + +/* DMEM registers */ +#define SLIM_REV_ID_OFST 0x0 +#define SLIM_REV_ID_MIN_MASK GENMASK(15, 8) +#define SLIM_REV_ID_MIN(id) ((id & SLIM_REV_ID_MIN_MASK) >> 8) +#define SLIM_REV_ID_MAJ_MASK GENMASK(23, 16) +#define SLIM_REV_ID_MAJ(id) ((id & SLIM_REV_ID_MAJ_MASK) >> 16) + + +/* peripherals registers */ +#define SLIM_STBUS_SYNC_OFST 0xF88 +#define SLIM_STBUS_SYNC_DIS BIT(0) + +#define SLIM_INT_SET_OFST 0xFD4 +#define SLIM_INT_CLR_OFST 0xFD8 +#define SLIM_INT_MASK_OFST 0xFDC + +#define SLIM_CMD_CLR_OFST 0xFC8 +#define SLIM_CMD_MASK_OFST 0xFCC + +static const char *mem_names[ST_SLIM_MEM_MAX] = { + [ST_SLIM_DMEM] = "dmem", + [ST_SLIM_IMEM] = "imem", +}; + +static int slim_clk_get(struct st_slim_rproc *slim_rproc, struct device *dev) +{ + int clk, err; + + for (clk = 0; clk < ST_SLIM_MAX_CLK; clk++) { + slim_rproc->clks[clk] = of_clk_get(dev->of_node, clk); + if (IS_ERR(slim_rproc->clks[clk])) { + err = PTR_ERR(slim_rproc->clks[clk]); + if (err == -EPROBE_DEFER) + goto err_put_clks; + slim_rproc->clks[clk] = NULL; + break; + } + } + + return 0; + +err_put_clks: + while (--clk >= 0) + clk_put(slim_rproc->clks[clk]); + + return err; +} + +static void slim_clk_disable(struct st_slim_rproc *slim_rproc) +{ + int clk; + + for (clk = 0; clk < ST_SLIM_MAX_CLK && slim_rproc->clks[clk]; clk++) + clk_disable_unprepare(slim_rproc->clks[clk]); +} + +static int slim_clk_enable(struct st_slim_rproc *slim_rproc) +{ + int clk, ret; + + for (clk = 0; clk < ST_SLIM_MAX_CLK && slim_rproc->clks[clk]; clk++) { + ret = clk_prepare_enable(slim_rproc->clks[clk]); + if (ret) + goto err_disable_clks; + } + + return 0; + +err_disable_clks: + while (--clk >= 0) + clk_disable_unprepare(slim_rproc->clks[clk]); + + return ret; +} + +/* + * Remoteproc slim specific device handlers + */ +static int slim_rproc_start(struct rproc *rproc) +{ + struct device *dev = &rproc->dev; + struct st_slim_rproc *slim_rproc = rproc->priv; + unsigned long hw_id, hw_ver, fw_rev; + u32 val; + + /* disable CPU pipeline clock & reset CPU pipeline */ + val = SLIM_CLK_GATE_DIS | SLIM_CLK_GATE_RESET; + writel(val, slim_rproc->slimcore + SLIM_CLK_GATE_OFST); + + /* disable SLIM core STBus sync */ + writel(SLIM_STBUS_SYNC_DIS, slim_rproc->peri + SLIM_STBUS_SYNC_OFST); + + /* enable cpu pipeline clock */ + writel(!SLIM_CLK_GATE_DIS, + slim_rproc->slimcore + SLIM_CLK_GATE_OFST); + + /* clear int & cmd mailbox */ + writel(~0U, slim_rproc->peri + SLIM_INT_CLR_OFST); + writel(~0U, slim_rproc->peri + SLIM_CMD_CLR_OFST); + + /* enable all channels cmd & int */ + writel(~0U, slim_rproc->peri + SLIM_INT_MASK_OFST); + writel(~0U, slim_rproc->peri + SLIM_CMD_MASK_OFST); + + /* enable cpu */ + writel(SLIM_EN_RUN, slim_rproc->slimcore + SLIM_EN_OFST); + + hw_id = readl_relaxed(slim_rproc->slimcore + SLIM_ID_OFST); + hw_ver = readl_relaxed(slim_rproc->slimcore + SLIM_VER_OFST); + + fw_rev = readl(slim_rproc->mem[ST_SLIM_DMEM].cpu_addr + + SLIM_REV_ID_OFST); + + dev_info(dev, "fw rev:%ld.%ld on SLIM %ld.%ld\n", + SLIM_REV_ID_MAJ(fw_rev), SLIM_REV_ID_MIN(fw_rev), + hw_id, hw_ver); + + return 0; +} + +static int slim_rproc_stop(struct rproc *rproc) +{ + struct st_slim_rproc *slim_rproc = rproc->priv; + u32 val; + + /* mask all (cmd & int) channels */ + writel(0UL, slim_rproc->peri + SLIM_INT_MASK_OFST); + writel(0UL, slim_rproc->peri + SLIM_CMD_MASK_OFST); + + /* disable cpu pipeline clock */ + writel(SLIM_CLK_GATE_DIS, slim_rproc->slimcore + SLIM_CLK_GATE_OFST); + + writel(!SLIM_EN_RUN, slim_rproc->slimcore + SLIM_EN_OFST); + + val = readl(slim_rproc->slimcore + SLIM_EN_OFST); + if (val & SLIM_EN_RUN) + dev_warn(&rproc->dev, "Failed to disable SLIM"); + + dev_dbg(&rproc->dev, "slim stopped\n"); + + return 0; +} + +static void *slim_rproc_da_to_va(struct rproc *rproc, u64 da, int len) +{ + struct st_slim_rproc *slim_rproc = rproc->priv; + void *va = NULL; + int i; + + for (i = 0; i < ST_SLIM_MEM_MAX; i++) { + if (da != slim_rproc->mem[i].bus_addr) + continue; + + if (len <= slim_rproc->mem[i].size) { + /* __force to make sparse happy with type conversion */ + va = (__force void *)slim_rproc->mem[i].cpu_addr; + break; + } + } + + dev_dbg(&rproc->dev, "da = 0x%llx len = 0x%x va = 0x%p\n", da, len, va); + + return va; +} + +static struct rproc_ops slim_rproc_ops = { + .start = slim_rproc_start, + .stop = slim_rproc_stop, + .da_to_va = slim_rproc_da_to_va, +}; + +/* + * Firmware handler operations: sanity, boot address, load ... + */ + +static struct resource_table empty_rsc_tbl = { + .ver = 1, + .num = 0, +}; + +static struct resource_table *slim_rproc_find_rsc_table(struct rproc *rproc, + const struct firmware *fw, + int *tablesz) +{ + *tablesz = sizeof(empty_rsc_tbl); + return &empty_rsc_tbl; +} + +static struct rproc_fw_ops slim_rproc_fw_ops = { + .find_rsc_table = slim_rproc_find_rsc_table, +}; + +/** + * st_slim_rproc_alloc() - allocate and initialise slim rproc + * @pdev: Pointer to the platform_device struct + * @fw_name: Name of firmware for rproc to use + * + * Function for allocating and initialising a slim rproc for use by + * device drivers whose IP is based around the SLIM core. It + * obtains and enables any clocks required by the SLIM core and also + * ioremaps the various IO. + * + * Returns st_slim_rproc pointer or PTR_ERR() on error. + */ + +struct st_slim_rproc *st_slim_rproc_alloc(struct platform_device *pdev, + char *fw_name) +{ + struct device *dev = &pdev->dev; + struct st_slim_rproc *slim_rproc; + struct device_node *np = dev->of_node; + struct rproc *rproc; + struct resource *res; + int err, i; + const struct rproc_fw_ops *elf_ops; + + if (!fw_name) + return ERR_PTR(-EINVAL); + + if (!of_device_is_compatible(np, "st,slim-rproc")) + return ERR_PTR(-EINVAL); + + rproc = rproc_alloc(dev, np->name, &slim_rproc_ops, + fw_name, sizeof(*slim_rproc)); + if (!rproc) + return ERR_PTR(-ENOMEM); + + rproc->has_iommu = false; + + slim_rproc = rproc->priv; + slim_rproc->rproc = rproc; + + elf_ops = rproc->fw_ops; + /* Use some generic elf ops */ + slim_rproc_fw_ops.load = elf_ops->load; + slim_rproc_fw_ops.sanity_check = elf_ops->sanity_check; + + rproc->fw_ops = &slim_rproc_fw_ops; + + /* get imem and dmem */ + for (i = 0; i < ARRAY_SIZE(mem_names); i++) { + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + mem_names[i]); + + slim_rproc->mem[i].cpu_addr = devm_ioremap_resource(dev, res); + if (IS_ERR(slim_rproc->mem[i].cpu_addr)) { + dev_err(&pdev->dev, "devm_ioremap_resource failed\n"); + err = PTR_ERR(slim_rproc->mem[i].cpu_addr); + goto err; + } + slim_rproc->mem[i].bus_addr = res->start; + slim_rproc->mem[i].size = resource_size(res); + } + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "slimcore"); + slim_rproc->slimcore = devm_ioremap_resource(dev, res); + if (IS_ERR(slim_rproc->slimcore)) { + dev_err(&pdev->dev, "failed to ioremap slimcore IO\n"); + err = PTR_ERR(slim_rproc->slimcore); + goto err; + } + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "peripherals"); + slim_rproc->peri = devm_ioremap_resource(dev, res); + if (IS_ERR(slim_rproc->peri)) { + dev_err(&pdev->dev, "failed to ioremap peripherals IO\n"); + err = PTR_ERR(slim_rproc->peri); + goto err; + } + + err = slim_clk_get(slim_rproc, dev); + if (err) + goto err; + + err = slim_clk_enable(slim_rproc); + if (err) { + dev_err(dev, "Failed to enable clocks\n"); + goto err_clk_put; + } + + /* Register as a remoteproc device */ + err = rproc_add(rproc); + if (err) { + dev_err(dev, "registration of slim remoteproc failed\n"); + goto err_clk_dis; + } + + return slim_rproc; + +err_clk_dis: + slim_clk_disable(slim_rproc); +err_clk_put: + for (i = 0; i < ST_SLIM_MAX_CLK && slim_rproc->clks[i]; i++) + clk_put(slim_rproc->clks[i]); +err: + rproc_put(rproc); + return ERR_PTR(err); +} +EXPORT_SYMBOL(st_slim_rproc_alloc); + +/** + * st_slim_rproc_put() - put slim rproc resources + * @slim_rproc: Pointer to the st_slim_rproc struct + * + * Function for calling respective _put() functions on slim_rproc resources. + * + */ +void st_slim_rproc_put(struct st_slim_rproc *slim_rproc) +{ + int clk; + + if (!slim_rproc) + return; + + slim_clk_disable(slim_rproc); + + for (clk = 0; clk < ST_SLIM_MAX_CLK && slim_rproc->clks[clk]; clk++) + clk_put(slim_rproc->clks[clk]); + + rproc_del(slim_rproc->rproc); + rproc_put(slim_rproc->rproc); +} +EXPORT_SYMBOL(st_slim_rproc_put); + +MODULE_AUTHOR("Peter Griffin "); +MODULE_DESCRIPTION("STMicroelectronics SLIM core rproc driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/remoteproc/st_slim_rproc.h b/include/linux/remoteproc/st_slim_rproc.h new file mode 100644 index 000000000000..4155556fa4b2 --- /dev/null +++ b/include/linux/remoteproc/st_slim_rproc.h @@ -0,0 +1,58 @@ +/* + * SLIM core rproc driver header + * + * Copyright (C) 2016 STMicroelectronics + * + * Author: Peter Griffin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ +#ifndef _ST_REMOTEPROC_SLIM_H +#define _ST_REMOTEPROC_SLIM_H + +#define ST_SLIM_MEM_MAX 2 +#define ST_SLIM_MAX_CLK 4 + +enum { + ST_SLIM_DMEM, + ST_SLIM_IMEM, +}; + +/** + * struct st_slim_mem - slim internal memory structure + * @cpu_addr: MPU virtual address of the memory region + * @bus_addr: Bus address used to access the memory region + * @size: Size of the memory region + */ +struct st_slim_mem { + void __iomem *cpu_addr; + phys_addr_t bus_addr; + size_t size; +}; + +/** + * struct st_slim_rproc - SLIM slim core + * @rproc: rproc handle + * @mem: slim memory information + * @slimcore: slim slimcore regs + * @peri: slim peripheral regs + * @clks: slim clocks + */ +struct st_slim_rproc { + struct rproc *rproc; + struct st_slim_mem mem[ST_SLIM_MEM_MAX]; + void __iomem *slimcore; + void __iomem *peri; + + /* st_slim_rproc private */ + struct clk *clks[ST_SLIM_MAX_CLK]; +}; + +struct st_slim_rproc *st_slim_rproc_alloc(struct platform_device *pdev, + char *fw_name); +void st_slim_rproc_put(struct st_slim_rproc *slim_rproc); + +#endif -- cgit v1.2.3 From 1a3f060c1a47dba4e12ac21ce62b57666b9c4e95 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 17 Oct 2016 19:15:44 -0700 Subject: net: Introduce new api for walking upper and lower devices This patch introduces netdev_walk_all_upper_dev_rcu, netdev_walk_all_lower_dev and netdev_walk_all_lower_dev_rcu. These functions recursively walk the adj_list of devices to determine all upper and lower devices. The functions take a callback function that is invoked for each device in the list. If the callback returns non-0, the walk is terminated and the functions return that code back to callers. v3 - simplified netdev_has_upper_dev_all_rcu and __netdev_has_upper_dev and removed typecast as suggested by Stephen v2 - fixed definition of netdev_next_lower_dev_rcu to mirror the upper_dev version. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 17 +++++ net/core/dev.c | 155 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 172 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bf341b65ca5e..a5902d995907 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3778,6 +3778,14 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, updev; \ updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter))) +int netdev_walk_all_upper_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *upper_dev, + void *data), + void *data); + +bool netdev_has_upper_dev_all_rcu(struct net_device *dev, + struct net_device *upper_dev); + void *netdev_lower_get_next_private(struct net_device *dev, struct list_head **iter); void *netdev_lower_get_next_private_rcu(struct net_device *dev, @@ -3821,6 +3829,15 @@ struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, ldev; \ ldev = netdev_all_lower_get_next_rcu(dev, &(iter))) +int netdev_walk_all_lower_dev(struct net_device *dev, + int (*fn)(struct net_device *lower_dev, + void *data), + void *data); +int netdev_walk_all_lower_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *lower_dev, + void *data), + void *data); + void *netdev_adjacent_get_private(struct list_head *adj_list); void *netdev_lower_get_first_private_rcu(struct net_device *dev); struct net_device *netdev_master_upper_dev_get(struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index f67fd16615bb..fc48337cfab8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5155,6 +5155,31 @@ bool netdev_has_upper_dev(struct net_device *dev, } EXPORT_SYMBOL(netdev_has_upper_dev); +/** + * netdev_has_upper_dev_all - Check if device is linked to an upper device + * @dev: device + * @upper_dev: upper device to check + * + * Find out if a device is linked to specified upper device and return true + * in case it is. Note that this checks the entire upper device chain. + * The caller must hold rcu lock. + */ + +static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data) +{ + struct net_device *dev = data; + + return upper_dev == dev; +} + +bool netdev_has_upper_dev_all_rcu(struct net_device *dev, + struct net_device *upper_dev) +{ + return !!netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, + upper_dev); +} +EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu); + /** * netdev_has_any_upper_dev - Check if device is linked to some device * @dev: device @@ -5255,6 +5280,51 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, } EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu); +static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev, + struct list_head **iter) +{ + struct netdev_adjacent *upper; + + WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held()); + + upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); + + if (&upper->list == &dev->adj_list.upper) + return NULL; + + *iter = &upper->list; + + return upper->dev; +} + +int netdev_walk_all_upper_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *udev; + struct list_head *iter; + int ret; + + for (iter = &dev->adj_list.upper, + udev = netdev_next_upper_dev_rcu(dev, &iter); + udev; + udev = netdev_next_upper_dev_rcu(dev, &iter)) { + /* first is the upper device itself */ + ret = fn(udev, data); + if (ret) + return ret; + + /* then look at all of its upper devices */ + ret = netdev_walk_all_upper_dev_rcu(udev, fn, data); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu); + /** * netdev_lower_get_next_private - Get the next ->private from the * lower neighbour list @@ -5361,6 +5431,49 @@ struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list } EXPORT_SYMBOL(netdev_all_lower_get_next); +static struct net_device *netdev_next_lower_dev(struct net_device *dev, + struct list_head **iter) +{ + struct netdev_adjacent *lower; + + lower = list_entry(*iter, struct netdev_adjacent, list); + + if (&lower->list == &dev->adj_list.lower) + return NULL; + + *iter = lower->list.next; + + return lower->dev; +} + +int netdev_walk_all_lower_dev(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *ldev; + struct list_head *iter; + int ret; + + for (iter = &dev->adj_list.lower, + ldev = netdev_next_lower_dev(dev, &iter); + ldev; + ldev = netdev_next_lower_dev(dev, &iter)) { + /* first is the lower device itself */ + ret = fn(ldev, data); + if (ret) + return ret; + + /* then look at all of its lower devices */ + ret = netdev_walk_all_lower_dev(ldev, fn, data); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev); + /** * netdev_all_lower_get_next_rcu - Get the next device from all * lower neighbour list, RCU variant @@ -5382,6 +5495,48 @@ struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, } EXPORT_SYMBOL(netdev_all_lower_get_next_rcu); +static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, + struct list_head **iter) +{ + struct netdev_adjacent *lower; + + lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); + if (&lower->list == &dev->adj_list.lower) + return NULL; + + *iter = &lower->list; + + return lower->dev; +} + +int netdev_walk_all_lower_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *ldev; + struct list_head *iter; + int ret; + + for (iter = &dev->adj_list.lower, + ldev = netdev_next_lower_dev_rcu(dev, &iter); + ldev; + ldev = netdev_next_lower_dev_rcu(dev, &iter)) { + /* first is the lower device itself */ + ret = fn(ldev, data); + if (ret) + return ret; + + /* then look at all of its lower devices */ + ret = netdev_walk_all_lower_dev_rcu(ldev, fn, data); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev_rcu); + /** * netdev_lower_get_first_private_rcu - Get the first ->private from the * lower neighbour list, RCU -- cgit v1.2.3 From f1170fd462c67c4ae2f20734566d94e0f8f62f69 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 17 Oct 2016 19:15:51 -0700 Subject: net: Remove all_adj_list and its references Only direct adjacencies are maintained. All upper or lower devices can be learned via the new walk API which recursively walks the adj_list for upper devices or lower devices. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 25 ------ net/core/dev.c | 223 ++++------------------------------------------ 2 files changed, 18 insertions(+), 230 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a5902d995907..458c87631e7f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1456,7 +1456,6 @@ enum netdev_priv_flags { * @ptype_specific: Device-specific, protocol-specific packet handlers * * @adj_list: Directly linked devices, like slaves for bonding - * @all_adj_list: All linked devices, *including* neighbours * @features: Currently active device features * @hw_features: User-changeable features * @@ -1675,11 +1674,6 @@ struct net_device { struct list_head lower; } adj_list; - struct { - struct list_head upper; - struct list_head lower; - } all_adj_list; - netdev_features_t features; netdev_features_t hw_features; netdev_features_t wanted_features; @@ -3771,13 +3765,6 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, updev; \ updev = netdev_upper_get_next_dev_rcu(dev, &(iter))) -/* iterate through upper list, must be called under RCU read lock */ -#define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \ - for (iter = &(dev)->all_adj_list.upper, \ - updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)); \ - updev; \ - updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter))) - int netdev_walk_all_upper_dev_rcu(struct net_device *dev, int (*fn)(struct net_device *upper_dev, void *data), @@ -3817,18 +3804,6 @@ struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, struct list_head **iter); -#define netdev_for_each_all_lower_dev(dev, ldev, iter) \ - for (iter = (dev)->all_adj_list.lower.next, \ - ldev = netdev_all_lower_get_next(dev, &(iter)); \ - ldev; \ - ldev = netdev_all_lower_get_next(dev, &(iter))) - -#define netdev_for_each_all_lower_dev_rcu(dev, ldev, iter) \ - for (iter = (dev)->all_adj_list.lower.next, \ - ldev = netdev_all_lower_get_next_rcu(dev, &(iter)); \ - ldev; \ - ldev = netdev_all_lower_get_next_rcu(dev, &(iter))) - int netdev_walk_all_lower_dev(struct net_device *dev, int (*fn)(struct net_device *lower_dev, void *data), diff --git a/net/core/dev.c b/net/core/dev.c index fc48337cfab8..a9fe14908b44 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5137,6 +5137,13 @@ static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev, return NULL; } +static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data) +{ + struct net_device *dev = data; + + return upper_dev == dev; +} + /** * netdev_has_upper_dev - Check if device is linked to an upper device * @dev: device @@ -5151,7 +5158,8 @@ bool netdev_has_upper_dev(struct net_device *dev, { ASSERT_RTNL(); - return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper); + return netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, + upper_dev); } EXPORT_SYMBOL(netdev_has_upper_dev); @@ -5165,13 +5173,6 @@ EXPORT_SYMBOL(netdev_has_upper_dev); * The caller must hold rcu lock. */ -static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data) -{ - struct net_device *dev = data; - - return upper_dev == dev; -} - bool netdev_has_upper_dev_all_rcu(struct net_device *dev, struct net_device *upper_dev) { @@ -5191,7 +5192,7 @@ static bool netdev_has_any_upper_dev(struct net_device *dev) { ASSERT_RTNL(); - return !list_empty(&dev->all_adj_list.upper); + return !list_empty(&dev->adj_list.upper); } /** @@ -5254,32 +5255,6 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); -/** - * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list - * @dev: device - * @iter: list_head ** of the current position - * - * Gets the next device from the dev's upper list, starting from iter - * position. The caller must hold RCU read lock. - */ -struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, - struct list_head **iter) -{ - struct netdev_adjacent *upper; - - WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held()); - - upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); - - if (&upper->list == &dev->all_adj_list.upper) - return NULL; - - *iter = &upper->list; - - return upper->dev; -} -EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu); - static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev, struct list_head **iter) { @@ -5406,31 +5381,6 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter) } EXPORT_SYMBOL(netdev_lower_get_next); -/** - * netdev_all_lower_get_next - Get the next device from all lower neighbour list - * @dev: device - * @iter: list_head ** of the current position - * - * Gets the next netdev_adjacent from the dev's all lower neighbour - * list, starting from iter position. The caller must hold RTNL lock or - * its own locking that guarantees that the neighbour all lower - * list will remain unchanged. - */ -struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter) -{ - struct netdev_adjacent *lower; - - lower = list_entry(*iter, struct netdev_adjacent, list); - - if (&lower->list == &dev->all_adj_list.lower) - return NULL; - - *iter = lower->list.next; - - return lower->dev; -} -EXPORT_SYMBOL(netdev_all_lower_get_next); - static struct net_device *netdev_next_lower_dev(struct net_device *dev, struct list_head **iter) { @@ -5474,27 +5424,6 @@ int netdev_walk_all_lower_dev(struct net_device *dev, } EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev); -/** - * netdev_all_lower_get_next_rcu - Get the next device from all - * lower neighbour list, RCU variant - * @dev: device - * @iter: list_head ** of the current position - * - * Gets the next netdev_adjacent from the dev's all lower neighbour - * list, starting from iter position. The caller must hold RCU read lock. - */ -struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, - struct list_head **iter) -{ - struct netdev_adjacent *lower; - - lower = list_first_or_null_rcu(&dev->all_adj_list.lower, - struct netdev_adjacent, list); - - return lower ? lower->dev : NULL; -} -EXPORT_SYMBOL(netdev_all_lower_get_next_rcu); - static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, struct list_head **iter) { @@ -5722,15 +5651,6 @@ static int __netdev_adjacent_dev_link_lists(struct net_device *dev, return 0; } -static int __netdev_adjacent_dev_link(struct net_device *dev, - struct net_device *upper_dev) -{ - return __netdev_adjacent_dev_link_lists(dev, upper_dev, - &dev->all_adj_list.upper, - &upper_dev->all_adj_list.lower, - NULL, false); -} - static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, struct net_device *upper_dev, u16 ref_nr, @@ -5741,40 +5661,19 @@ static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list); } -static void __netdev_adjacent_dev_unlink(struct net_device *dev, - struct net_device *upper_dev, - u16 ref_nr) -{ - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, ref_nr, - &dev->all_adj_list.upper, - &upper_dev->all_adj_list.lower); -} - static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, struct net_device *upper_dev, void *private, bool master) { - int ret = __netdev_adjacent_dev_link(dev, upper_dev); - - if (ret) - return ret; - - ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, - &dev->adj_list.upper, - &upper_dev->adj_list.lower, - private, master); - if (ret) { - __netdev_adjacent_dev_unlink(dev, upper_dev, 1); - return ret; - } - - return 0; + return __netdev_adjacent_dev_link_lists(dev, upper_dev, + &dev->adj_list.upper, + &upper_dev->adj_list.lower, + private, master); } static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, struct net_device *upper_dev) { - __netdev_adjacent_dev_unlink(dev, upper_dev, 1); __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1, &dev->adj_list.upper, &upper_dev->adj_list.lower); @@ -5785,7 +5684,6 @@ static int __netdev_upper_dev_link(struct net_device *dev, void *upper_priv, void *upper_info) { struct netdev_notifier_changeupper_info changeupper_info; - struct netdev_adjacent *i, *j, *to_i, *to_j; int ret = 0; ASSERT_RTNL(); @@ -5794,10 +5692,10 @@ static int __netdev_upper_dev_link(struct net_device *dev, return -EBUSY; /* To prevent loops, check if dev is not upper device to upper_dev. */ - if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper)) + if (netdev_has_upper_dev(upper_dev, dev)) return -EBUSY; - if (__netdev_find_adj(upper_dev, &dev->adj_list.upper)) + if (netdev_has_upper_dev(dev, upper_dev)) return -EEXIST; if (master && netdev_master_upper_dev_get(dev)) @@ -5819,80 +5717,15 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (ret) return ret; - /* Now that we linked these devs, make all the upper_dev's - * all_adj_list.upper visible to every dev's all_adj_list.lower an - * versa, and don't forget the devices itself. All of these - * links are non-neighbours. - */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { - pr_debug("Interlinking %s with %s, non-neighbour\n", - i->dev->name, j->dev->name); - ret = __netdev_adjacent_dev_link(i->dev, j->dev); - if (ret) - goto rollback_mesh; - } - } - - /* add dev to every upper_dev's upper device */ - list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { - pr_debug("linking %s's upper device %s with %s\n", - upper_dev->name, i->dev->name, dev->name); - ret = __netdev_adjacent_dev_link(dev, i->dev); - if (ret) - goto rollback_upper_mesh; - } - - /* add upper_dev to every dev's lower device */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - pr_debug("linking %s's lower device %s with %s\n", dev->name, - i->dev->name, upper_dev->name); - ret = __netdev_adjacent_dev_link(i->dev, upper_dev); - if (ret) - goto rollback_lower_mesh; - } - ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, &changeupper_info.info); ret = notifier_to_errno(ret); if (ret) - goto rollback_lower_mesh; + goto rollback; return 0; -rollback_lower_mesh: - to_i = i; - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - if (i == to_i) - break; - __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); - } - - i = NULL; - -rollback_upper_mesh: - to_i = i; - list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { - if (i == to_i) - break; - __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); - } - - i = j = NULL; - -rollback_mesh: - to_i = i; - to_j = j; - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { - if (i == to_i && j == to_j) - break; - __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); - } - if (i == to_i) - break; - } - +rollback: __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); return ret; @@ -5949,7 +5782,6 @@ void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev) { struct netdev_notifier_changeupper_info changeupper_info; - struct netdev_adjacent *i, *j; ASSERT_RTNL(); changeupper_info.upper_dev = upper_dev; @@ -5961,23 +5793,6 @@ void netdev_upper_dev_unlink(struct net_device *dev, __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); - /* Here is the tricky part. We must remove all dev's lower - * devices from all upper_dev's upper devices and vice - * versa, to maintain the graph relationship. - */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) - list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); - - /* remove also the devices itself from lower/upper device - * list - */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) - __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); - - list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); - call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, &changeupper_info.info); } @@ -7679,8 +7494,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->link_watch_list); INIT_LIST_HEAD(&dev->adj_list.upper); INIT_LIST_HEAD(&dev->adj_list.lower); - INIT_LIST_HEAD(&dev->all_adj_list.upper); - INIT_LIST_HEAD(&dev->all_adj_list.lower); INIT_LIST_HEAD(&dev->ptype_all); INIT_LIST_HEAD(&dev->ptype_specific); #ifdef CONFIG_NET_SCHED -- cgit v1.2.3 From 1f9127caece42514a47011326b83ad93d95cd5d7 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Mon, 17 Oct 2016 10:49:54 -0500 Subject: net: phy: Create phy_supported_speeds function which lists speeds currently supported by a phydevice phy_supported_speeds provides a means to get a list of all the speeds a phy device currently supports. Signed-off-by: Zach Brown Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 35 +++++++++++++++++++++++++++++++++++ include/linux/phy.h | 15 +++++++++++++++ 2 files changed, 50 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 8b7659e94057..ee3c793124c7 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -260,6 +260,41 @@ static inline unsigned int phy_find_valid(unsigned int idx, u32 features) return idx < MAX_NUM_SETTINGS ? idx : MAX_NUM_SETTINGS - 1; } +/** + * phy_supported_speeds - return all speeds currently supported by a phy device + * @phy: The phy device to return supported speeds of. + * @speeds: buffer to store supported speeds in. + * @size: size of speeds buffer. + * + * Description: Returns the number of supported speeds, and fills the speeds + * buffer with the supported speeds. If speeds buffer is too small to contain + * all currently supported speeds, will return as many speeds as can fit. + */ +unsigned int phy_supported_speeds(struct phy_device *phy, + unsigned int *speeds, + unsigned int size) +{ + unsigned int count = 0; + unsigned int idx = 0; + + while (idx < MAX_NUM_SETTINGS && count < size) { + idx = phy_find_valid(idx, phy->supported); + + if (!(settings[idx].setting & phy->supported)) + break; + + /* Assumes settings are grouped by speed */ + if ((count == 0) || + (speeds[count - 1] != settings[idx].speed)) { + speeds[count] = settings[idx].speed; + count++; + } + idx++; + } + + return count; +} + /** * phy_check_valid - check if there is a valid PHY setting which matches * speed, duplex, and feature mask diff --git a/include/linux/phy.h b/include/linux/phy.h index c47378c93607..4b6c246c63bb 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -84,6 +84,21 @@ typedef enum { PHY_INTERFACE_MODE_MAX, } phy_interface_t; +/** + * phy_supported_speeds - return all speeds currently supported by a phy device + * @phy: The phy device to return supported speeds of. + * @speeds: buffer to store supported speeds in. + * @size: size of speeds buffer. + * + * Description: Returns the number of supported speeds, and + * fills the speeds * buffer with the supported speeds. If speeds buffer is + * too small to contain * all currently supported speeds, will return as + * many speeds as can fit. + */ +unsigned int phy_supported_speeds(struct phy_device *phy, + unsigned int *speeds, + unsigned int size); + /** * It maps 'enum phy_interface_t' found in include/linux/phy.h * into the device tree binding of 'phy-mode', so that Ethernet -- cgit v1.2.3 From 2e0bc452f4721520502575362a9cd3c1248d2337 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Mon, 17 Oct 2016 10:49:55 -0500 Subject: net: phy: leds: add support for led triggers on phy link state change Create an option CONFIG_LED_TRIGGER_PHY (default n), which will create a set of led triggers for each instantiated PHY device. There is one LED trigger per link-speed, per-phy. The triggers are registered during phy_attach and unregistered during phy_detach. This allows for a user to configure their system to allow a set of LEDs not controlled by the phy to represent link state changes on the phy. LEDS controlled by the phy are unaffected. For example, we have a board where some of the leds in the RJ45 socket are controlled by the phy, but others are not. Using the triggers provided by this patch the leds not controlled by the phy can be configured to show the current speed of the ethernet connection. The leds controlled by the phy are unaffected. Signed-off-by: Josh Cartwright Signed-off-by: Nathan Sullivan Signed-off-by: Zach Brown Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 13 ++++ drivers/net/phy/Makefile | 1 + drivers/net/phy/phy.c | 1 + drivers/net/phy/phy_device.c | 5 ++ drivers/net/phy/phy_led_triggers.c | 136 +++++++++++++++++++++++++++++++++++++ include/linux/phy.h | 7 ++ include/linux/phy_led_triggers.h | 51 ++++++++++++++ 7 files changed, 214 insertions(+) create mode 100644 drivers/net/phy/phy_led_triggers.c create mode 100644 include/linux/phy_led_triggers.h (limited to 'include/linux') diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 2651c8d8de2f..45f68eaf9b79 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -15,6 +15,19 @@ if PHYLIB config SWPHY bool +config LED_TRIGGER_PHY + bool "Support LED triggers for tracking link state" + depends on LEDS_TRIGGERS + ---help--- + Adds support for a set of LED trigger events per-PHY. Link + state change will trigger the events, for consumption by an + LED class driver. There are triggers for each link speed currently + supported by the phy, and are of the form: + :: + + Where speed is in the form: + Mbps or Gbps + comment "MDIO bus device drivers" config MDIO_BCM_IPROC diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index e58667d111e7..86d12cd3fbf0 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -2,6 +2,7 @@ libphy-y := phy.o phy_device.o mdio_bus.o mdio_device.o libphy-$(CONFIG_SWPHY) += swphy.o +libphy-$(CONFIG_LED_TRIGGER_PHY) += phy_led_triggers.o obj-$(CONFIG_PHYLIB) += libphy.o diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index ee3c793124c7..2f94c60d4939 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -946,6 +946,7 @@ EXPORT_SYMBOL(phy_start); static void phy_adjust_link(struct phy_device *phydev) { phydev->adjust_link(phydev->attached_dev); + phy_led_trigger_change_speed(phydev); } /** diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index ac440a815353..49a1c988d29c 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -916,6 +917,8 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, else phy_resume(phydev); + phy_led_triggers_register(phydev); + return err; error: @@ -989,6 +992,8 @@ void phy_detach(struct phy_device *phydev) } } + phy_led_triggers_unregister(phydev); + /* * The phydev might go away on the put_device() below, so avoid * a use-after-free bug by reading the underlying bus first. diff --git a/drivers/net/phy/phy_led_triggers.c b/drivers/net/phy/phy_led_triggers.c new file mode 100644 index 000000000000..cda600a1b766 --- /dev/null +++ b/drivers/net/phy/phy_led_triggers.c @@ -0,0 +1,136 @@ +/* Copyright (C) 2016 National Instruments Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include +#include +#include + +static struct phy_led_trigger *phy_speed_to_led_trigger(struct phy_device *phy, + unsigned int speed) +{ + unsigned int i; + + for (i = 0; i < phy->phy_num_led_triggers; i++) { + if (phy->phy_led_triggers[i].speed == speed) + return &phy->phy_led_triggers[i]; + } + return NULL; +} + +void phy_led_trigger_change_speed(struct phy_device *phy) +{ + struct phy_led_trigger *plt; + + if (!phy->link) + goto out_change_speed; + + if (phy->speed == 0) + return; + + plt = phy_speed_to_led_trigger(phy, phy->speed); + if (!plt) { + netdev_alert(phy->attached_dev, + "No phy led trigger registered for speed(%d)\n", + phy->speed); + goto out_change_speed; + } + + if (plt != phy->last_triggered) { + led_trigger_event(&phy->last_triggered->trigger, LED_OFF); + led_trigger_event(&plt->trigger, LED_FULL); + phy->last_triggered = plt; + } + return; + +out_change_speed: + if (phy->last_triggered) { + led_trigger_event(&phy->last_triggered->trigger, + LED_OFF); + phy->last_triggered = NULL; + } +} +EXPORT_SYMBOL_GPL(phy_led_trigger_change_speed); + +static int phy_led_trigger_register(struct phy_device *phy, + struct phy_led_trigger *plt, + unsigned int speed) +{ + char name_suffix[PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE]; + + plt->speed = speed; + + if (speed < SPEED_1000) + snprintf(name_suffix, sizeof(name_suffix), "%dMbps", speed); + else if (speed == SPEED_2500) + snprintf(name_suffix, sizeof(name_suffix), "2.5Gbps"); + else + snprintf(name_suffix, sizeof(name_suffix), "%dGbps", + DIV_ROUND_CLOSEST(speed, 1000)); + + snprintf(plt->name, sizeof(plt->name), PHY_ID_FMT ":%s", + phy->mdio.bus->id, phy->mdio.addr, name_suffix); + plt->trigger.name = plt->name; + + return led_trigger_register(&plt->trigger); +} + +static void phy_led_trigger_unregister(struct phy_led_trigger *plt) +{ + led_trigger_unregister(&plt->trigger); +} + +int phy_led_triggers_register(struct phy_device *phy) +{ + int i, err; + unsigned int speeds[50]; + + phy->phy_num_led_triggers = phy_supported_speeds(phy, speeds, + ARRAY_SIZE(speeds)); + if (!phy->phy_num_led_triggers) + return 0; + + phy->phy_led_triggers = devm_kzalloc(&phy->mdio.dev, + sizeof(struct phy_led_trigger) * + phy->phy_num_led_triggers, + GFP_KERNEL); + if (!phy->phy_led_triggers) + return -ENOMEM; + + for (i = 0; i < phy->phy_num_led_triggers; i++) { + err = phy_led_trigger_register(phy, &phy->phy_led_triggers[i], + speeds[i]); + if (err) + goto out_unreg; + } + + phy->last_triggered = NULL; + phy_led_trigger_change_speed(phy); + + return 0; +out_unreg: + while (i--) + phy_led_trigger_unregister(&phy->phy_led_triggers[i]); + devm_kfree(&phy->mdio.dev, phy->phy_led_triggers); + return err; +} +EXPORT_SYMBOL_GPL(phy_led_triggers_register); + +void phy_led_triggers_unregister(struct phy_device *phy) +{ + int i; + + for (i = 0; i < phy->phy_num_led_triggers; i++) + phy_led_trigger_unregister(&phy->phy_led_triggers[i]); + + devm_kfree(&phy->mdio.dev, phy->phy_led_triggers); +} +EXPORT_SYMBOL_GPL(phy_led_triggers_unregister); diff --git a/include/linux/phy.h b/include/linux/phy.h index 4b6c246c63bb..e7e1fd382564 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -420,6 +421,12 @@ struct phy_device { int link_timeout; +#ifdef CONFIG_LED_TRIGGER_PHY + struct phy_led_trigger *phy_led_triggers; + unsigned int phy_num_led_triggers; + struct phy_led_trigger *last_triggered; +#endif + /* * Interrupt number for this PHY * -1 means no interrupt diff --git a/include/linux/phy_led_triggers.h b/include/linux/phy_led_triggers.h new file mode 100644 index 000000000000..a2daea0a37d2 --- /dev/null +++ b/include/linux/phy_led_triggers.h @@ -0,0 +1,51 @@ +/* Copyright (C) 2016 National Instruments Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef __PHY_LED_TRIGGERS +#define __PHY_LED_TRIGGERS + +struct phy_device; + +#ifdef CONFIG_LED_TRIGGER_PHY + +#include + +#define PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE 10 +#define PHY_MII_BUS_ID_SIZE (20 - 3) + +#define PHY_LINK_LED_TRIGGER_NAME_SIZE (PHY_MII_BUS_ID_SIZE + \ + FIELD_SIZEOF(struct mdio_device, addr)+\ + PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE) + +struct phy_led_trigger { + struct led_trigger trigger; + char name[PHY_LINK_LED_TRIGGER_NAME_SIZE]; + unsigned int speed; +}; + + +extern int phy_led_triggers_register(struct phy_device *phy); +extern void phy_led_triggers_unregister(struct phy_device *phy); +extern void phy_led_trigger_change_speed(struct phy_device *phy); + +#else + +static inline int phy_led_triggers_register(struct phy_device *phy) +{ + return 0; +} +static inline void phy_led_triggers_unregister(struct phy_device *phy) { } +static inline void phy_led_trigger_change_speed(struct phy_device *phy) { } + +#endif + +#endif -- cgit v1.2.3 From 797476b88bde2a6001f9552f383f147e58c1a330 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 18 Oct 2016 15:40:29 +0900 Subject: block: Add 'zoned' queue limit Add the zoned queue limit to indicate the zoning model of a block device. Defined values are 0 (BLK_ZONED_NONE) for regular block devices, 1 (BLK_ZONED_HA) for host-aware zone block devices and 2 (BLK_ZONED_HM) for host-managed zone block devices. The standards defined drive managed model is not defined here since these block devices do not provide any command for accessing zone information. Drive managed model devices will be reported as BLK_ZONED_NONE. The helper functions blk_queue_zoned_model and bdev_zoned_model return the zoned limit and the functions blk_queue_is_zoned and bdev_is_zoned return a boolean for callers to test if a block device is zoned. The zoned attribute is also exported as a string to applications via sysfs. BLK_ZONED_NONE shows as "none", BLK_ZONED_HA as "host-aware" and BLK_ZONED_HM as "host-managed". Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Shaun Tancheff Tested-by: Shaun Tancheff Signed-off-by: Jens Axboe --- Documentation/ABI/testing/sysfs-block | 16 ++++++++++++ block/blk-settings.c | 1 + block/blk-sysfs.c | 18 ++++++++++++++ include/linux/blkdev.h | 47 +++++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index 71d184dbb70d..75a5055a722b 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block @@ -235,3 +235,19 @@ Description: write_same_max_bytes is 0, write same is not supported by the device. +What: /sys/block//queue/zoned +Date: September 2016 +Contact: Damien Le Moal +Description: + zoned indicates if the device is a zoned block device + and the zone model of the device if it is indeed zoned. + The possible values indicated by zoned are "none" for + regular block devices and "host-aware" or "host-managed" + for zoned block devices. The characteristics of + host-aware and host-managed zoned block devices are + described in the ZBC (Zoned Block Commands) and ZAC + (Zoned Device ATA Command Set) standards. These standards + also define the "drive-managed" zone model. However, + since drive-managed zoned block devices do not support + zone commands, they will be treated as regular block + devices and zoned will report "none". diff --git a/block/blk-settings.c b/block/blk-settings.c index f679ae122843..b1d5b7fa4d07 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -107,6 +107,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->io_opt = 0; lim->misaligned = 0; lim->cluster = 1; + lim->zoned = BLK_ZONED_NONE; } EXPORT_SYMBOL(blk_set_default_limits); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 9cc8d7c5439a..ff9cd9cd35a3 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -257,6 +257,18 @@ QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0); QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0); #undef QUEUE_SYSFS_BIT_FNS +static ssize_t queue_zoned_show(struct request_queue *q, char *page) +{ + switch (blk_queue_zoned_model(q)) { + case BLK_ZONED_HA: + return sprintf(page, "host-aware\n"); + case BLK_ZONED_HM: + return sprintf(page, "host-managed\n"); + default: + return sprintf(page, "none\n"); + } +} + static ssize_t queue_nomerges_show(struct request_queue *q, char *page) { return queue_var_show((blk_queue_nomerges(q) << 1) | @@ -485,6 +497,11 @@ static struct queue_sysfs_entry queue_nonrot_entry = { .store = queue_store_nonrot, }; +static struct queue_sysfs_entry queue_zoned_entry = { + .attr = {.name = "zoned", .mode = S_IRUGO }, + .show = queue_zoned_show, +}; + static struct queue_sysfs_entry queue_nomerges_entry = { .attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR }, .show = queue_nomerges_show, @@ -546,6 +563,7 @@ static struct attribute *default_attrs[] = { &queue_discard_zeroes_data_entry.attr, &queue_write_same_max_entry.attr, &queue_nonrot_entry.attr, + &queue_zoned_entry.attr, &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, &queue_iostats_entry.attr, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c47c358ba052..f19e16bb43d1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -261,6 +261,15 @@ struct blk_queue_tag { #define BLK_SCSI_MAX_CMDS (256) #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) +/* + * Zoned block device models (zoned limit). + */ +enum blk_zoned_model { + BLK_ZONED_NONE, /* Regular block device */ + BLK_ZONED_HA, /* Host-aware zoned block device */ + BLK_ZONED_HM, /* Host-managed zoned block device */ +}; + struct queue_limits { unsigned long bounce_pfn; unsigned long seg_boundary_mask; @@ -290,6 +299,7 @@ struct queue_limits { unsigned char cluster; unsigned char discard_zeroes_data; unsigned char raid_partial_stripes_expensive; + enum blk_zoned_model zoned; }; struct request_queue { @@ -627,6 +637,23 @@ static inline unsigned int blk_queue_cluster(struct request_queue *q) return q->limits.cluster; } +static inline enum blk_zoned_model +blk_queue_zoned_model(struct request_queue *q) +{ + return q->limits.zoned; +} + +static inline bool blk_queue_is_zoned(struct request_queue *q) +{ + switch (blk_queue_zoned_model(q)) { + case BLK_ZONED_HA: + case BLK_ZONED_HM: + return true; + default: + return false; + } +} + /* * We regard a request as sync, if either a read or a sync write */ @@ -1354,6 +1381,26 @@ static inline unsigned int bdev_write_same(struct block_device *bdev) return 0; } +static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (q) + return blk_queue_zoned_model(q); + + return BLK_ZONED_NONE; +} + +static inline bool bdev_is_zoned(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (q) + return blk_queue_is_zoned(q); + + return false; +} + static inline int queue_dma_alignment(struct request_queue *q) { return q ? q->dma_alignment : 511; -- cgit v1.2.3 From 2d253440b5afb128d22ccdae812dde9ba77a2cca Mon Sep 17 00:00:00 2001 From: Shaun Tancheff Date: Tue, 18 Oct 2016 15:40:32 +0900 Subject: block: Define zoned block device operations Define REQ_OP_ZONE_REPORT and REQ_OP_ZONE_RESET for handling zones of host-managed and host-aware zoned block devices. With with these two new operations, the total number of operations defined reaches 8 and still fits with the 3 bits definition of REQ_OP_BITS. Signed-off-by: Shaun Tancheff Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-core.c | 4 ++++ include/linux/blk_types.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 14d7c0740dc0..e4eda5d2aa56 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1941,6 +1941,10 @@ generic_make_request_checks(struct bio *bio) case REQ_OP_WRITE_SAME: if (!bdev_write_same(bio->bi_bdev)) goto not_supported; + case REQ_OP_ZONE_REPORT: + case REQ_OP_ZONE_RESET: + if (!bdev_is_zoned(bio->bi_bdev)) + goto not_supported; break; default: break; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index cd395ecec99d..dd50dce89a80 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -243,6 +243,8 @@ enum req_op { REQ_OP_SECURE_ERASE, /* request to securely erase sectors */ REQ_OP_WRITE_SAME, /* write same block many times */ REQ_OP_FLUSH, /* request for cache flush */ + REQ_OP_ZONE_REPORT, /* Get zone information */ + REQ_OP_ZONE_RESET, /* Reset a zone write pointer */ }; #define REQ_OP_BITS 3 -- cgit v1.2.3 From 6a0cb1bc106fc07ce0443303bcdb7f7da5131e5c Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 18 Oct 2016 15:40:33 +0900 Subject: block: Implement support for zoned block devices Implement zoned block device zone information reporting and reset. Zone information are reported as struct blk_zone. This implementation does not differentiate between host-aware and host-managed device models and is valid for both. Two functions are provided: blkdev_report_zones for discovering the zone configuration of a zoned block device, and blkdev_reset_zones for resetting the write pointer of sequential zones. The helper function blk_queue_zone_size and bdev_zone_size are also provided for, as the name suggest, obtaining the zone size (in 512B sectors) of the zones of the device. Signed-off-by: Hannes Reinecke [Damien: * Removed the zone cache * Implement report zones operation based on earlier proposal by Shaun Tancheff ] Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Shaun Tancheff Tested-by: Shaun Tancheff Signed-off-by: Jens Axboe --- block/Kconfig | 8 ++ block/Makefile | 1 + block/blk-zoned.c | 257 ++++++++++++++++++++++++++++++++++++++++++ include/linux/blkdev.h | 31 +++++ include/uapi/linux/Kbuild | 1 + include/uapi/linux/blkzoned.h | 103 +++++++++++++++++ 6 files changed, 401 insertions(+) create mode 100644 block/blk-zoned.c create mode 100644 include/uapi/linux/blkzoned.h (limited to 'include/linux') diff --git a/block/Kconfig b/block/Kconfig index 1d4d624492fc..6b0ad08f0677 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -89,6 +89,14 @@ config BLK_DEV_INTEGRITY T10/SCSI Data Integrity Field or the T13/ATA External Path Protection. If in doubt, say N. +config BLK_DEV_ZONED + bool "Zoned block device support" + ---help--- + Block layer zoned block device support. This option enables + support for ZAC/ZBC host-managed and host-aware zoned block devices. + + Say yes here if you have a ZAC or ZBC storage device. + config BLK_DEV_THROTTLING bool "Block layer bio throttling support" depends on BLK_CGROUP=y diff --git a/block/Makefile b/block/Makefile index 36acdd7545be..934dac73fb37 100644 --- a/block/Makefile +++ b/block/Makefile @@ -23,3 +23,4 @@ obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o +obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o diff --git a/block/blk-zoned.c b/block/blk-zoned.c new file mode 100644 index 000000000000..1603573f9605 --- /dev/null +++ b/block/blk-zoned.c @@ -0,0 +1,257 @@ +/* + * Zoned block device handling + * + * Copyright (c) 2015, Hannes Reinecke + * Copyright (c) 2015, SUSE Linux GmbH + * + * Copyright (c) 2016, Damien Le Moal + * Copyright (c) 2016, Western Digital + */ + +#include +#include +#include +#include + +static inline sector_t blk_zone_start(struct request_queue *q, + sector_t sector) +{ + sector_t zone_mask = blk_queue_zone_size(q) - 1; + + return sector & ~zone_mask; +} + +/* + * Check that a zone report belongs to the partition. + * If yes, fix its start sector and write pointer, copy it in the + * zone information array and return true. Return false otherwise. + */ +static bool blkdev_report_zone(struct block_device *bdev, + struct blk_zone *rep, + struct blk_zone *zone) +{ + sector_t offset = get_start_sect(bdev); + + if (rep->start < offset) + return false; + + rep->start -= offset; + if (rep->start + rep->len > bdev->bd_part->nr_sects) + return false; + + if (rep->type == BLK_ZONE_TYPE_CONVENTIONAL) + rep->wp = rep->start + rep->len; + else + rep->wp -= offset; + memcpy(zone, rep, sizeof(struct blk_zone)); + + return true; +} + +/** + * blkdev_report_zones - Get zones information + * @bdev: Target block device + * @sector: Sector from which to report zones + * @zones: Array of zone structures where to return the zones information + * @nr_zones: Number of zone structures in the zone array + * @gfp_mask: Memory allocation flags (for bio_alloc) + * + * Description: + * Get zone information starting from the zone containing @sector. + * The number of zone information reported may be less than the number + * requested by @nr_zones. The number of zones actually reported is + * returned in @nr_zones. + */ +int blkdev_report_zones(struct block_device *bdev, + sector_t sector, + struct blk_zone *zones, + unsigned int *nr_zones, + gfp_t gfp_mask) +{ + struct request_queue *q = bdev_get_queue(bdev); + struct blk_zone_report_hdr *hdr; + unsigned int nrz = *nr_zones; + struct page *page; + unsigned int nr_rep; + size_t rep_bytes; + unsigned int nr_pages; + struct bio *bio; + struct bio_vec *bv; + unsigned int i, n, nz; + unsigned int ofst; + void *addr; + int ret = 0; + + if (!q) + return -ENXIO; + + if (!blk_queue_is_zoned(q)) + return -EOPNOTSUPP; + + if (!nrz) + return 0; + + if (sector > bdev->bd_part->nr_sects) { + *nr_zones = 0; + return 0; + } + + /* + * The zone report has a header. So make room for it in the + * payload. Also make sure that the report fits in a single BIO + * that will not be split down the stack. + */ + rep_bytes = sizeof(struct blk_zone_report_hdr) + + sizeof(struct blk_zone) * nrz; + rep_bytes = (rep_bytes + PAGE_SIZE - 1) & PAGE_MASK; + if (rep_bytes > (queue_max_sectors(q) << 9)) + rep_bytes = queue_max_sectors(q) << 9; + + nr_pages = min_t(unsigned int, BIO_MAX_PAGES, + rep_bytes >> PAGE_SHIFT); + nr_pages = min_t(unsigned int, nr_pages, + queue_max_segments(q)); + + bio = bio_alloc(gfp_mask, nr_pages); + if (!bio) + return -ENOMEM; + + bio->bi_bdev = bdev; + bio->bi_iter.bi_sector = blk_zone_start(q, sector); + bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0); + + for (i = 0; i < nr_pages; i++) { + page = alloc_page(gfp_mask); + if (!page) { + ret = -ENOMEM; + goto out; + } + if (!bio_add_page(bio, page, PAGE_SIZE, 0)) { + __free_page(page); + break; + } + } + + if (i == 0) + ret = -ENOMEM; + else + ret = submit_bio_wait(bio); + if (ret) + goto out; + + /* + * Process the report result: skip the header and go through the + * reported zones to fixup and fixup the zone information for + * partitions. At the same time, return the zone information into + * the zone array. + */ + n = 0; + nz = 0; + nr_rep = 0; + bio_for_each_segment_all(bv, bio, i) { + + if (!bv->bv_page) + break; + + addr = kmap_atomic(bv->bv_page); + + /* Get header in the first page */ + ofst = 0; + if (!nr_rep) { + hdr = (struct blk_zone_report_hdr *) addr; + nr_rep = hdr->nr_zones; + ofst = sizeof(struct blk_zone_report_hdr); + } + + /* Fixup and report zones */ + while (ofst < bv->bv_len && + n < nr_rep && nz < nrz) { + if (blkdev_report_zone(bdev, addr + ofst, &zones[nz])) + nz++; + ofst += sizeof(struct blk_zone); + n++; + } + + kunmap_atomic(addr); + + if (n >= nr_rep || nz >= nrz) + break; + + } + +out: + bio_for_each_segment_all(bv, bio, i) + __free_page(bv->bv_page); + bio_put(bio); + + if (ret == 0) + *nr_zones = nz; + + return ret; +} +EXPORT_SYMBOL_GPL(blkdev_report_zones); + +/** + * blkdev_reset_zones - Reset zones write pointer + * @bdev: Target block device + * @sector: Start sector of the first zone to reset + * @nr_sectors: Number of sectors, at least the length of one zone + * @gfp_mask: Memory allocation flags (for bio_alloc) + * + * Description: + * Reset the write pointer of the zones contained in the range + * @sector..@sector+@nr_sectors. Specifying the entire disk sector range + * is valid, but the specified range should not contain conventional zones. + */ +int blkdev_reset_zones(struct block_device *bdev, + sector_t sector, sector_t nr_sectors, + gfp_t gfp_mask) +{ + struct request_queue *q = bdev_get_queue(bdev); + sector_t zone_sectors; + sector_t end_sector = sector + nr_sectors; + struct bio *bio; + int ret; + + if (!q) + return -ENXIO; + + if (!blk_queue_is_zoned(q)) + return -EOPNOTSUPP; + + if (end_sector > bdev->bd_part->nr_sects) + /* Out of range */ + return -EINVAL; + + /* Check alignment (handle eventual smaller last zone) */ + zone_sectors = blk_queue_zone_size(q); + if (sector & (zone_sectors - 1)) + return -EINVAL; + + if ((nr_sectors & (zone_sectors - 1)) && + end_sector != bdev->bd_part->nr_sects) + return -EINVAL; + + while (sector < end_sector) { + + bio = bio_alloc(gfp_mask, 0); + bio->bi_iter.bi_sector = sector; + bio->bi_bdev = bdev; + bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0); + + ret = submit_bio_wait(bio); + bio_put(bio); + + if (ret) + return ret; + + sector += zone_sectors; + + /* This may take a while, so be nice to others */ + cond_resched(); + + } + + return 0; +} +EXPORT_SYMBOL_GPL(blkdev_reset_zones); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f19e16bb43d1..252043f7cd2c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -24,6 +24,7 @@ #include #include #include +#include struct module; struct scsi_ioctl_command; @@ -302,6 +303,21 @@ struct queue_limits { enum blk_zoned_model zoned; }; +#ifdef CONFIG_BLK_DEV_ZONED + +struct blk_zone_report_hdr { + unsigned int nr_zones; + u8 padding[60]; +}; + +extern int blkdev_report_zones(struct block_device *bdev, + sector_t sector, struct blk_zone *zones, + unsigned int *nr_zones, gfp_t gfp_mask); +extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors, + sector_t nr_sectors, gfp_t gfp_mask); + +#endif /* CONFIG_BLK_DEV_ZONED */ + struct request_queue { /* * Together with queue_head for cacheline sharing @@ -654,6 +670,11 @@ static inline bool blk_queue_is_zoned(struct request_queue *q) } } +static inline unsigned int blk_queue_zone_size(struct request_queue *q) +{ + return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; +} + /* * We regard a request as sync, if either a read or a sync write */ @@ -1401,6 +1422,16 @@ static inline bool bdev_is_zoned(struct block_device *bdev) return false; } +static inline unsigned int bdev_zone_size(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (q) + return blk_queue_zone_size(q); + + return 0; +} + static inline int queue_dma_alignment(struct request_queue *q) { return q ? q->dma_alignment : 511; diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 6965d0909554..b2166f283da9 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -70,6 +70,7 @@ header-y += bfs_fs.h header-y += binfmts.h header-y += blkpg.h header-y += blktrace_api.h +header-y += blkzoned.h header-y += bpf_common.h header-y += bpf_perf_event.h header-y += bpf.h diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h new file mode 100644 index 000000000000..a3817214b0e0 --- /dev/null +++ b/include/uapi/linux/blkzoned.h @@ -0,0 +1,103 @@ +/* + * Zoned block devices handling. + * + * Copyright (C) 2015 Seagate Technology PLC + * + * Written by: Shaun Tancheff + * + * Modified by: Damien Le Moal + * Copyright (C) 2016 Western Digital + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ +#ifndef _UAPI_BLKZONED_H +#define _UAPI_BLKZONED_H + +#include + +/** + * enum blk_zone_type - Types of zones allowed in a zoned device. + * + * @BLK_ZONE_TYPE_CONVENTIONAL: The zone has no write pointer and can be writen + * randomly. Zone reset has no effect on the zone. + * @BLK_ZONE_TYPE_SEQWRITE_REQ: The zone must be written sequentially + * @BLK_ZONE_TYPE_SEQWRITE_PREF: The zone can be written non-sequentially + * + * Any other value not defined is reserved and must be considered as invalid. + */ +enum blk_zone_type { + BLK_ZONE_TYPE_CONVENTIONAL = 0x1, + BLK_ZONE_TYPE_SEQWRITE_REQ = 0x2, + BLK_ZONE_TYPE_SEQWRITE_PREF = 0x3, +}; + +/** + * enum blk_zone_cond - Condition [state] of a zone in a zoned device. + * + * @BLK_ZONE_COND_NOT_WP: The zone has no write pointer, it is conventional. + * @BLK_ZONE_COND_EMPTY: The zone is empty. + * @BLK_ZONE_COND_IMP_OPEN: The zone is open, but not explicitly opened. + * @BLK_ZONE_COND_EXP_OPEN: The zones was explicitly opened by an + * OPEN ZONE command. + * @BLK_ZONE_COND_CLOSED: The zone was [explicitly] closed after writing. + * @BLK_ZONE_COND_FULL: The zone is marked as full, possibly by a zone + * FINISH ZONE command. + * @BLK_ZONE_COND_READONLY: The zone is read-only. + * @BLK_ZONE_COND_OFFLINE: The zone is offline (sectors cannot be read/written). + * + * The Zone Condition state machine in the ZBC/ZAC standards maps the above + * deinitions as: + * - ZC1: Empty | BLK_ZONE_EMPTY + * - ZC2: Implicit Open | BLK_ZONE_COND_IMP_OPEN + * - ZC3: Explicit Open | BLK_ZONE_COND_EXP_OPEN + * - ZC4: Closed | BLK_ZONE_CLOSED + * - ZC5: Full | BLK_ZONE_FULL + * - ZC6: Read Only | BLK_ZONE_READONLY + * - ZC7: Offline | BLK_ZONE_OFFLINE + * + * Conditions 0x5 to 0xC are reserved by the current ZBC/ZAC spec and should + * be considered invalid. + */ +enum blk_zone_cond { + BLK_ZONE_COND_NOT_WP = 0x0, + BLK_ZONE_COND_EMPTY = 0x1, + BLK_ZONE_COND_IMP_OPEN = 0x2, + BLK_ZONE_COND_EXP_OPEN = 0x3, + BLK_ZONE_COND_CLOSED = 0x4, + BLK_ZONE_COND_READONLY = 0xD, + BLK_ZONE_COND_FULL = 0xE, + BLK_ZONE_COND_OFFLINE = 0xF, +}; + +/** + * struct blk_zone - Zone descriptor for BLKREPORTZONE ioctl. + * + * @start: Zone start in 512 B sector units + * @len: Zone length in 512 B sector units + * @wp: Zone write pointer location in 512 B sector units + * @type: see enum blk_zone_type for possible values + * @cond: see enum blk_zone_cond for possible values + * @non_seq: Flag indicating that the zone is using non-sequential resources + * (for host-aware zoned block devices only). + * @reset: Flag indicating that a zone reset is recommended. + * @reserved: Padding to 64 B to match the ZBC/ZAC defined zone descriptor size. + * + * start, len and wp use the regular 512 B sector unit, regardless of the + * device logical block size. The overall structure size is 64 B to match the + * ZBC/ZAC defined zone descriptor and allow support for future additional + * zone information. + */ +struct blk_zone { + __u64 start; /* Zone start sector */ + __u64 len; /* Zone length in number of sectors */ + __u64 wp; /* Zone write pointer position */ + __u8 type; /* Zone type */ + __u8 cond; /* Zone condition */ + __u8 non_seq; /* Non-sequential write resources active */ + __u8 reset; /* Reset write pointer recommended */ + __u8 reserved[36]; +}; + +#endif /* _UAPI_BLKZONED_H */ -- cgit v1.2.3 From 3ed05a987e0f63b21e634101e0b460d32f3581c3 Mon Sep 17 00:00:00 2001 From: Shaun Tancheff Date: Tue, 18 Oct 2016 15:40:35 +0900 Subject: blk-zoned: implement ioctls Adds the new BLKREPORTZONE and BLKRESETZONE ioctls for respectively obtaining the zone configuration of a zoned block device and resetting the write pointer of sequential zones of a zoned block device. The BLKREPORTZONE ioctl maps directly to a single call of the function blkdev_report_zones. The zone information result is passed as an array of struct blk_zone identical to the structure used internally for processing the REQ_OP_ZONE_REPORT operation. The BLKRESETZONE ioctl maps to a single call of the blkdev_reset_zones function. Signed-off-by: Shaun Tancheff Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-zoned.c | 93 +++++++++++++++++++++++++++++++++++++++++++ block/ioctl.c | 4 ++ include/linux/blkdev.h | 21 ++++++++++ include/uapi/linux/blkzoned.h | 40 +++++++++++++++++++ include/uapi/linux/fs.h | 4 ++ 5 files changed, 162 insertions(+) (limited to 'include/linux') diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 1603573f9605..667f95d86695 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -255,3 +255,96 @@ int blkdev_reset_zones(struct block_device *bdev, return 0; } EXPORT_SYMBOL_GPL(blkdev_reset_zones); + +/** + * BLKREPORTZONE ioctl processing. + * Called from blkdev_ioctl. + */ +int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + void __user *argp = (void __user *)arg; + struct request_queue *q; + struct blk_zone_report rep; + struct blk_zone *zones; + int ret; + + if (!argp) + return -EINVAL; + + q = bdev_get_queue(bdev); + if (!q) + return -ENXIO; + + if (!blk_queue_is_zoned(q)) + return -ENOTTY; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report))) + return -EFAULT; + + if (!rep.nr_zones) + return -EINVAL; + + zones = kcalloc(rep.nr_zones, sizeof(struct blk_zone), GFP_KERNEL); + if (!zones) + return -ENOMEM; + + ret = blkdev_report_zones(bdev, rep.sector, + zones, &rep.nr_zones, + GFP_KERNEL); + if (ret) + goto out; + + if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) { + ret = -EFAULT; + goto out; + } + + if (rep.nr_zones) { + if (copy_to_user(argp + sizeof(struct blk_zone_report), zones, + sizeof(struct blk_zone) * rep.nr_zones)) + ret = -EFAULT; + } + + out: + kfree(zones); + + return ret; +} + +/** + * BLKRESETZONE ioctl processing. + * Called from blkdev_ioctl. + */ +int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + void __user *argp = (void __user *)arg; + struct request_queue *q; + struct blk_zone_range zrange; + + if (!argp) + return -EINVAL; + + q = bdev_get_queue(bdev); + if (!q) + return -ENXIO; + + if (!blk_queue_is_zoned(q)) + return -ENOTTY; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (!(mode & FMODE_WRITE)) + return -EBADF; + + if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range))) + return -EFAULT; + + return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors, + GFP_KERNEL); +} diff --git a/block/ioctl.c b/block/ioctl.c index 755119c3c1b9..f856963204f4 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -519,6 +519,10 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, BLKDEV_DISCARD_SECURE); case BLKZEROOUT: return blk_ioctl_zeroout(bdev, mode, arg); + case BLKREPORTZONE: + return blkdev_report_zones_ioctl(bdev, mode, cmd, arg); + case BLKRESETZONE: + return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg); case HDIO_GETGEO: return blkdev_getgeo(bdev, argp); case BLKRAGET: diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 252043f7cd2c..90097dd8b8ed 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -316,6 +316,27 @@ extern int blkdev_report_zones(struct block_device *bdev, extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask); +extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg); +extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg); + +#else /* CONFIG_BLK_DEV_ZONED */ + +static inline int blkdev_report_zones_ioctl(struct block_device *bdev, + fmode_t mode, unsigned int cmd, + unsigned long arg) +{ + return -ENOTTY; +} + +static inline int blkdev_reset_zones_ioctl(struct block_device *bdev, + fmode_t mode, unsigned int cmd, + unsigned long arg) +{ + return -ENOTTY; +} + #endif /* CONFIG_BLK_DEV_ZONED */ struct request_queue { diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h index a3817214b0e0..40d1d7bff537 100644 --- a/include/uapi/linux/blkzoned.h +++ b/include/uapi/linux/blkzoned.h @@ -16,6 +16,7 @@ #define _UAPI_BLKZONED_H #include +#include /** * enum blk_zone_type - Types of zones allowed in a zoned device. @@ -100,4 +101,43 @@ struct blk_zone { __u8 reserved[36]; }; +/** + * struct blk_zone_report - BLKREPORTZONE ioctl request/reply + * + * @sector: starting sector of report + * @nr_zones: IN maximum / OUT actual + * @reserved: padding to 16 byte alignment + * @zones: Space to hold @nr_zones @zones entries on reply. + * + * The array of at most @nr_zones must follow this structure in memory. + */ +struct blk_zone_report { + __u64 sector; + __u32 nr_zones; + __u8 reserved[4]; + struct blk_zone zones[0]; +} __packed; + +/** + * struct blk_zone_range - BLKRESETZONE ioctl request + * @sector: starting sector of the first zone to issue reset write pointer + * @nr_sectors: Total number of sectors of 1 or more zones to reset + */ +struct blk_zone_range { + __u64 sector; + __u64 nr_sectors; +}; + +/** + * Zoned block device ioctl's: + * + * @BLKREPORTZONE: Get zone information. Takes a zone report as argument. + * The zone report will start from the zone containing the + * sector specified in the report request structure. + * @BLKRESETZONE: Reset the write pointer of the zones in the specified + * sector range. The sector range must be zone aligned. + */ +#define BLKREPORTZONE _IOWR(0x12, 130, struct blk_zone_report) +#define BLKRESETZONE _IOW(0x12, 131, struct blk_zone_range) + #endif /* _UAPI_BLKZONED_H */ diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index acb2b6152ba0..c1d11df07b28 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -225,6 +225,10 @@ struct fsxattr { #define BLKSECDISCARD _IO(0x12,125) #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) +/* + * A jump here: 130-131 are reserved for zoned block devices + * (see uapi/linux/blkzoned.h) + */ #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ -- cgit v1.2.3 From 9a97434215819872b054c3d0c067e5e4fa768b0e Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Mon, 17 Oct 2016 21:45:29 +0200 Subject: ARM: pxa: enhance smc91x platform data Instead of having the smc91x driver relying on machine_is_*() calls, provide this data through platform data, ie. idp, mainstone and stargate. This way, the driver doesn't need anymore machine_is_*() calls, which wouldn't work anymore with a device-tree build. Signed-off-by: Robert Jarzmik Signed-off-by: David S. Miller --- arch/arm/mach-pxa/idp.c | 1 + arch/arm/mach-pxa/mainstone.c | 1 + arch/arm/mach-pxa/stargate2.c | 1 + include/linux/smc91x.h | 1 + 4 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/arch/arm/mach-pxa/idp.c b/arch/arm/mach-pxa/idp.c index 66070acaa888..d1db32b1a2c6 100644 --- a/arch/arm/mach-pxa/idp.c +++ b/arch/arm/mach-pxa/idp.c @@ -85,6 +85,7 @@ static struct resource smc91x_resources[] = { static struct smc91x_platdata smc91x_platdata = { .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | SMC91X_USE_DMA | SMC91X_NOWAIT, + .pxa_u16_align4 = true, }; static struct platform_device smc91x_device = { diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c index 40964069a17c..a2d851a3a546 100644 --- a/arch/arm/mach-pxa/mainstone.c +++ b/arch/arm/mach-pxa/mainstone.c @@ -140,6 +140,7 @@ static struct resource smc91x_resources[] = { static struct smc91x_platdata mainstone_smc91x_info = { .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | SMC91X_NOWAIT | SMC91X_USE_DMA, + .pxa_u16_align4 = true, }; static struct platform_device smc91x_device = { diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c index 702f4f14b708..7b6610e9dae4 100644 --- a/arch/arm/mach-pxa/stargate2.c +++ b/arch/arm/mach-pxa/stargate2.c @@ -673,6 +673,7 @@ static struct resource smc91x_resources[] = { static struct smc91x_platdata stargate2_smc91x_info = { .flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT | SMC91X_NOWAIT | SMC91X_USE_DMA, + .pxa_u16_align4 = true, }; static struct platform_device smc91x_device = { diff --git a/include/linux/smc91x.h b/include/linux/smc91x.h index e302c447e057..129bc674dcf5 100644 --- a/include/linux/smc91x.h +++ b/include/linux/smc91x.h @@ -39,6 +39,7 @@ struct smc91x_platdata { unsigned long flags; unsigned char leda; unsigned char ledb; + bool pxa_u16_align4; /* PXA buggy u16 writes on 4*n+2 addresses */ }; #endif /* __SMC91X_H__ */ -- cgit v1.2.3 From 0f57dc6ae1ff0c702450083176b657ba37c07363 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Mon, 17 Oct 2016 16:48:58 +0100 Subject: remoteproc: Keep local copy of firmware name Storage of the firmware name was inconsistent, either storing a pointer to a name stored with unknown ownership, or a variable length tacked onto the end of the struct proc allocated in rproc_alloc. In preparation for allowing the firmware of an already allocated struct rproc to be changed, instead always keep a locally maintained copy of the firmware name. Signed-off-by: Matt Redfearn Signed-off-by: Bjorn Andersson --- drivers/remoteproc/remoteproc_core.c | 31 ++++++++++++++++--------------- include/linux/remoteproc.h | 2 +- 2 files changed, 17 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index c6bfb3496684..ccc2a73e94dd 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -1273,6 +1273,7 @@ static void rproc_type_release(struct device *dev) if (rproc->index >= 0) ida_simple_remove(&rproc_dev_index, rproc->index); + kfree(rproc->firmware); kfree(rproc); } @@ -1310,31 +1311,31 @@ struct rproc *rproc_alloc(struct device *dev, const char *name, { struct rproc *rproc; char *p, *template = "rproc-%s-fw"; - int name_len = 0; + int name_len; if (!dev || !name || !ops) return NULL; - if (!firmware) + if (!firmware) { /* - * Make room for default firmware name (minus %s plus '\0'). * If the caller didn't pass in a firmware name then - * construct a default name. We're already glomming 'len' - * bytes onto the end of the struct rproc allocation, so do - * a few more for the default firmware name (but only if - * the caller doesn't pass one). + * construct a default name. */ name_len = strlen(name) + strlen(template) - 2 + 1; - - rproc = kzalloc(sizeof(*rproc) + len + name_len, GFP_KERNEL); - if (!rproc) - return NULL; - - if (!firmware) { - p = (char *)rproc + sizeof(struct rproc) + len; + p = kmalloc(name_len, GFP_KERNEL); + if (!p) + return NULL; snprintf(p, name_len, template, name); } else { - p = (char *)firmware; + p = kstrdup(firmware, GFP_KERNEL); + if (!p) + return NULL; + } + + rproc = kzalloc(sizeof(struct rproc) + len, GFP_KERNEL); + if (!rproc) { + kfree(p); + return NULL; } rproc->firmware = p; diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 930023b7c825..940e4cf2ac48 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -415,7 +415,7 @@ struct rproc { struct list_head node; struct iommu_domain *domain; const char *name; - const char *firmware; + char *firmware; void *priv; const struct rproc_ops *ops; struct device dev; -- cgit v1.2.3 From 57a09bf0a416700676e77102c28f9cfcb48267e0 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 18 Oct 2016 19:51:19 +0200 Subject: bpf: Detect identical PTR_TO_MAP_VALUE_OR_NULL registers A BPF program is required to check the return register of a map_elem_lookup() call before accessing memory. The verifier keeps track of this by converting the type of the result register from PTR_TO_MAP_VALUE_OR_NULL to PTR_TO_MAP_VALUE after a conditional jump ensures safety. This check is currently exclusively performed for the result register 0. In the event the compiler reorders instructions, BPF_MOV64_REG instructions may be moved before the conditional jump which causes them to keep their type PTR_TO_MAP_VALUE_OR_NULL to which the verifier objects when the register is accessed: 0: (b7) r1 = 10 1: (7b) *(u64 *)(r10 -8) = r1 2: (bf) r2 = r10 3: (07) r2 += -8 4: (18) r1 = 0x59c00000 6: (85) call 1 7: (bf) r4 = r0 8: (15) if r0 == 0x0 goto pc+1 R0=map_value(ks=8,vs=8) R4=map_value_or_null(ks=8,vs=8) R10=fp 9: (7a) *(u64 *)(r4 +0) = 0 R4 invalid mem access 'map_value_or_null' This commit extends the verifier to keep track of all identical PTR_TO_MAP_VALUE_OR_NULL registers after a map_elem_lookup() by assigning them an ID and then marking them all when the conditional jump is observed. Signed-off-by: Thomas Graf Reviewed-by: Josef Bacik Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 2 +- kernel/bpf/verifier.c | 61 +++++++++++++++++------- tools/testing/selftests/bpf/test_verifier.c | 72 +++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7035b997aaa5..ac5b393ee6b2 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -23,13 +23,13 @@ struct bpf_reg_state { * result in a bad access. */ u64 min_value, max_value; + u32 id; union { /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ s64 imm; /* valid when type == PTR_TO_PACKET* */ struct { - u32 id; u16 off; u16 range; }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 99a7e5b388f2..846d7ceaf202 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -212,9 +212,10 @@ static void print_verifier_state(struct bpf_verifier_state *state) else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE || t == PTR_TO_MAP_VALUE_OR_NULL || t == PTR_TO_MAP_VALUE_ADJ) - verbose("(ks=%d,vs=%d)", + verbose("(ks=%d,vs=%d,id=%u)", reg->map_ptr->key_size, - reg->map_ptr->value_size); + reg->map_ptr->value_size, + reg->id); if (reg->min_value != BPF_REGISTER_MIN_RANGE) verbose(",min_value=%llu", (unsigned long long)reg->min_value); @@ -447,6 +448,7 @@ static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno) { BUG_ON(regno >= MAX_BPF_REG); regs[regno].type = UNKNOWN_VALUE; + regs[regno].id = 0; regs[regno].imm = 0; } @@ -1252,6 +1254,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id) return -EINVAL; } regs[BPF_REG_0].map_ptr = meta.map_ptr; + regs[BPF_REG_0].id = ++env->id_gen; } else { verbose("unknown return type %d of func %d\n", fn->ret_type, func_id); @@ -1644,8 +1647,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) insn->src_reg); return -EACCES; } - regs[insn->dst_reg].type = UNKNOWN_VALUE; - regs[insn->dst_reg].map_ptr = NULL; + mark_reg_unknown_value(regs, insn->dst_reg); } } else { /* case: R = imm @@ -1907,6 +1909,38 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, check_reg_overflow(true_reg); } +static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id, + enum bpf_reg_type type) +{ + struct bpf_reg_state *reg = ®s[regno]; + + if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) { + reg->type = type; + if (type == UNKNOWN_VALUE) + mark_reg_unknown_value(regs, regno); + } +} + +/* The logic is similar to find_good_pkt_pointers(), both could eventually + * be folded together at some point. + */ +static void mark_map_regs(struct bpf_verifier_state *state, u32 regno, + enum bpf_reg_type type) +{ + struct bpf_reg_state *regs = state->regs; + int i; + + for (i = 0; i < MAX_BPF_REG; i++) + mark_map_reg(regs, i, regs[regno].id, type); + + for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { + if (state->stack_slot_type[i] != STACK_SPILL) + continue; + mark_map_reg(state->spilled_regs, i / BPF_REG_SIZE, + regs[regno].id, type); + } +} + static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx) { @@ -1994,18 +2028,13 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, if (BPF_SRC(insn->code) == BPF_K && insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { - if (opcode == BPF_JEQ) { - /* next fallthrough insn can access memory via - * this register - */ - regs[insn->dst_reg].type = PTR_TO_MAP_VALUE; - /* branch targer cannot access it, since reg == 0 */ - mark_reg_unknown_value(other_branch->regs, - insn->dst_reg); - } else { - other_branch->regs[insn->dst_reg].type = PTR_TO_MAP_VALUE; - mark_reg_unknown_value(regs, insn->dst_reg); - } + /* Mark all identical map registers in each branch as either + * safe or unknown depending R == 0 or R != 0 conditional. + */ + mark_map_regs(this_branch, insn->dst_reg, + opcode == BPF_JEQ ? PTR_TO_MAP_VALUE : UNKNOWN_VALUE); + mark_map_regs(other_branch, insn->dst_reg, + opcode == BPF_JEQ ? UNKNOWN_VALUE : PTR_TO_MAP_VALUE); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index ff5df121b2f6..0ef8eaf6cea7 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -2588,6 +2588,78 @@ static struct bpf_test tests[] = { .result_unpriv = REJECT, .result = REJECT, }, + { + "multiple registers share map_lookup_elem result", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS + }, + { + "invalid memory access with multiple map_lookup_elem calls", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .result = REJECT, + .errstr = "R4 !read_ok", + .prog_type = BPF_PROG_TYPE_SCHED_CLS + }, + { + "valid indirect map_lookup_elem access with 2nd lookup in branch", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_2, 10), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3 From 3ca45a46f8af8c4a92dd8a08eac57787242d5021 Mon Sep 17 00:00:00 2001 From: zijun_hu Date: Fri, 14 Oct 2016 15:12:54 +0800 Subject: percpu: ensure the requested alignment is power of two The percpu allocator expectedly assumes that the requested alignment is power of two but hasn't been veryfing the input. If the specified alignment isn't power of two, the allocator can malfunction. Add the sanity check. The following is detailed analysis of the effects of alignments which aren't power of two. The alignment must be a even at least since the LSB of a chunk->map element is used as free/in-use flag of a area; besides, the alignment must be a power of 2 too since ALIGN() doesn't work well for other alignment always but is adopted by pcpu_fit_in_area(). IOW, the current allocator only works well for a power of 2 aligned area allocation. See below opposite example for why an odd alignment doesn't work. Let's assume area [16, 36) is free but its previous one is in-use, we want to allocate a @size == 8 and @align == 7 area. The larger area [16, 36) is split to three areas [16, 21), [21, 29), [29, 36) eventually. However, due to the usage for a chunk->map element, the actual offset of the aim area [21, 29) is 21 but is recorded in relevant element as 20; moreover, the residual tail free area [29, 36) is mistook as in-use and is lost silently Unlike macro roundup(), ALIGN(x, a) doesn't work if @a isn't a power of 2 for example, roundup(10, 6) == 12 but ALIGN(10, 6) == 10, and the latter result isn't desired obviously. tj: Code style and patch description updates. Signed-off-by: zijun_hu Suggested-by: Tejun Heo Signed-off-by: Tejun Heo --- include/linux/kernel.h | 1 + mm/percpu.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index bc6ed52a39b9..0dc0b21bd164 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -45,6 +45,7 @@ #define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) +/* @a is a power of 2 value */ #define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) #define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask)) #define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) diff --git a/mm/percpu.c b/mm/percpu.c index 255714302394..99d8abd4c1e7 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -886,7 +886,8 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, size = ALIGN(size, 2); - if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { + if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE || + !is_power_of_2(align))) { WARN(true, "illegal size (%zu) or align (%zu) for percpu allocation\n", size, align); return NULL; -- cgit v1.2.3 From 5dc8b362a2374d007bc0db649b7ab6a79dd32bda Mon Sep 17 00:00:00 2001 From: Adam Manzanares Date: Mon, 17 Oct 2016 11:27:28 -0700 Subject: block: Add iocontext priority to request Patch adds an association between iocontext ioprio and the ioprio of a request. This is done to enable request based drivers the ability to act on priority information stored in the request. An example being ATA devices that support command priorities. If the ATA driver discovers that the device supports command priorities and the request has valid priority information indicating the request is high priority, then a high priority command can be sent to the device. This should improve tail latencies for high priority IO on any device that queues requests internally and can make use of the priority information stored in the request. The ioprio of the request is set in blk_rq_set_prio which takes the request and the ioc as arguments. If the ioc is valid in blk_rq_set_prio then the iopriority of the request is set as the iopriority of the ioc. In init_request_from_bio a check is made to see if the ioprio of the bio is valid and if so then the request prio comes from the bio. Signed-off-by: Adam Manzananares Reviewed-by: Jens Axboe Signed-off-by: Tejun Heo --- block/blk-core.c | 4 +++- include/linux/blkdev.h | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 14d7c0740dc0..361b1b965d89 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1153,6 +1153,7 @@ static struct request *__get_request(struct request_list *rl, int op, blk_rq_init(q, rq); blk_rq_set_rl(rq, rl); + blk_rq_set_prio(rq, ioc); req_set_op_attrs(rq, op, op_flags | REQ_ALLOCED); /* init elvpriv */ @@ -1656,7 +1657,8 @@ void init_request_from_bio(struct request *req, struct bio *bio) req->errors = 0; req->__sector = bio->bi_iter.bi_sector; - req->ioprio = bio_prio(bio); + if (ioprio_valid(bio_prio(bio))) + req->ioprio = bio_prio(bio); blk_rq_bio_prep(req->q, req, bio); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c47c358ba052..9a0ceaa1b7e6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -933,6 +933,20 @@ static inline unsigned int blk_rq_count_bios(struct request *rq) return nr_bios; } +/* + * blk_rq_set_prio - associate a request with prio from ioc + * @rq: request of interest + * @ioc: target iocontext + * + * Assocate request prio with ioc prio so request based drivers + * can leverage priority information. + */ +static inline void blk_rq_set_prio(struct request *rq, struct io_context *ioc) +{ + if (ioc) + rq->ioprio = ioc->ioprio; +} + /* * Request issue related functions. */ -- cgit v1.2.3 From 8e061784b51ec4a4efed0deaafb5bd9725bf5b06 Mon Sep 17 00:00:00 2001 From: Adam Manzanares Date: Mon, 17 Oct 2016 11:27:29 -0700 Subject: ata: Enabling ATA Command Priorities This patch checks to see if an ATA device supports NCQ command priorities. If so and the user has specified an iocontext that indicates IO_PRIO_CLASS_RT then we build a tf with a high priority command. This is done to improve the tail latency of commands that are high priority by passing priority to the device. tj: Removed trivial ata_ncq_prio_enabled() and open-coded the test. Signed-off-by: Adam Manzanares Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 35 ++++++++++++++++++++++++++++++++++- drivers/ata/libata-scsi.c | 6 +++++- drivers/ata/libata.h | 2 +- include/linux/ata.h | 6 ++++++ include/linux/libata.h | 3 +++ 5 files changed, 49 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 223a770f78f3..8346faf63337 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -739,6 +739,7 @@ u64 ata_tf_read_block(const struct ata_taskfile *tf, struct ata_device *dev) * @n_block: Number of blocks * @tf_flags: RW/FUA etc... * @tag: tag + * @class: IO priority class * * LOCKING: * None. @@ -753,7 +754,7 @@ u64 ata_tf_read_block(const struct ata_taskfile *tf, struct ata_device *dev) */ int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev, u64 block, u32 n_block, unsigned int tf_flags, - unsigned int tag) + unsigned int tag, int class) { tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; tf->flags |= tf_flags; @@ -785,6 +786,12 @@ int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev, tf->device = ATA_LBA; if (tf->flags & ATA_TFLAG_FUA) tf->device |= 1 << 7; + + if (dev->flags & ATA_DFLAG_NCQ_PRIO) { + if (class == IOPRIO_CLASS_RT) + tf->hob_nsect |= ATA_PRIO_HIGH << + ATA_SHIFT_PRIO; + } } else if (dev->flags & ATA_DFLAG_LBA) { tf->flags |= ATA_TFLAG_LBA; @@ -2156,6 +2163,30 @@ static void ata_dev_config_ncq_non_data(struct ata_device *dev) } } +static void ata_dev_config_ncq_prio(struct ata_device *dev) +{ + struct ata_port *ap = dev->link->ap; + unsigned int err_mask; + + err_mask = ata_read_log_page(dev, + ATA_LOG_SATA_ID_DEV_DATA, + ATA_LOG_SATA_SETTINGS, + ap->sector_buf, + 1); + if (err_mask) { + ata_dev_dbg(dev, + "failed to get Identify Device data, Emask 0x%x\n", + err_mask); + return; + } + + if (ap->sector_buf[ATA_LOG_NCQ_PRIO_OFFSET] & BIT(3)) + dev->flags |= ATA_DFLAG_NCQ_PRIO; + else + ata_dev_dbg(dev, "SATA page does not support priority\n"); + +} + static int ata_dev_config_ncq(struct ata_device *dev, char *desc, size_t desc_sz) { @@ -2205,6 +2236,8 @@ static int ata_dev_config_ncq(struct ata_device *dev, ata_dev_config_ncq_send_recv(dev); if (ata_id_has_ncq_non_data(dev->id)) ata_dev_config_ncq_non_data(dev); + if (ata_id_has_ncq_prio(dev->id)) + ata_dev_config_ncq_prio(dev); } return 0; diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 9cceb4a875a5..2bccc3c7de48 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "libata.h" #include "libata-transport.h" @@ -1755,6 +1756,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc) { struct scsi_cmnd *scmd = qc->scsicmd; const u8 *cdb = scmd->cmnd; + struct request *rq = scmd->request; + int class = IOPRIO_PRIO_CLASS(req_get_ioprio(rq)); unsigned int tf_flags = 0; u64 block; u32 n_block; @@ -1821,7 +1824,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc) qc->nbytes = n_block * scmd->device->sector_size; rc = ata_build_rw_tf(&qc->tf, qc->dev, block, n_block, tf_flags, - qc->tag); + qc->tag, class); + if (likely(rc == 0)) return 0; diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index 3b301a48007c..8f3a5596dd67 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -66,7 +66,7 @@ extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf); extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag); extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev, u64 block, u32 n_block, unsigned int tf_flags, - unsigned int tag); + unsigned int tag, int class); extern u64 ata_tf_read_block(const struct ata_taskfile *tf, struct ata_device *dev); extern unsigned ata_exec_internal(struct ata_device *dev, diff --git a/include/linux/ata.h b/include/linux/ata.h index fdb180367ba1..af6859b3a93d 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -348,6 +348,7 @@ enum { ATA_LOG_DEVSLP_DETO = 0x01, ATA_LOG_DEVSLP_VALID = 0x07, ATA_LOG_DEVSLP_VALID_MASK = 0x80, + ATA_LOG_NCQ_PRIO_OFFSET = 0x09, /* NCQ send and receive log */ ATA_LOG_NCQ_SEND_RECV_SUBCMDS_OFFSET = 0x00, @@ -940,6 +941,11 @@ static inline bool ata_id_has_ncq_non_data(const u16 *id) return id[ATA_ID_SATA_CAPABILITY_2] & BIT(5); } +static inline bool ata_id_has_ncq_prio(const u16 *id) +{ + return id[ATA_ID_SATA_CAPABILITY] & BIT(12); +} + static inline bool ata_id_has_trim(const u16 *id) { if (ata_id_major_version(id) >= 7 && diff --git a/include/linux/libata.h b/include/linux/libata.h index 616eef4d81ea..90b69a6293a3 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -166,6 +166,7 @@ enum { ATA_DFLAG_NO_UNLOAD = (1 << 17), /* device doesn't support unload */ ATA_DFLAG_UNLOCK_HPA = (1 << 18), /* unlock HPA */ ATA_DFLAG_NCQ_SEND_RECV = (1 << 19), /* device supports NCQ SEND and RECV */ + ATA_DFLAG_NCQ_PRIO = (1 << 20), /* device supports NCQ priority */ ATA_DFLAG_INIT_MASK = (1 << 24) - 1, ATA_DFLAG_DETACH = (1 << 24), @@ -342,7 +343,9 @@ enum { ATA_SHIFT_PIO = 0, ATA_SHIFT_MWDMA = ATA_SHIFT_PIO + ATA_NR_PIO_MODES, ATA_SHIFT_UDMA = ATA_SHIFT_MWDMA + ATA_NR_MWDMA_MODES, + ATA_SHIFT_PRIO = 6, + ATA_PRIO_HIGH = 2, /* size of buffer to pad xfers ending on unaligned boundaries */ ATA_DMA_PAD_SZ = 4, -- cgit v1.2.3 From 84f95243b5439a20c33837075b88926bfa00c4ec Mon Sep 17 00:00:00 2001 From: Adam Manzanares Date: Mon, 17 Oct 2016 11:27:30 -0700 Subject: ata: ATA Command Priority Disabled By Default Add a sysfs entry to turn on priority information being passed to a ATA device. By default this feature is turned off. This patch depends on ata: Enabling ATA Command Priorities tj: Renamed ncq_prio_on to ncq_prio_enable and removed trivial ata_ncq_prio_on() and open-coded the test. Signed-off-by: Adam Manzanares Signed-off-by: Tejun Heo --- drivers/ata/libahci.c | 1 + drivers/ata/libata-core.c | 3 ++- drivers/ata/libata-scsi.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/libata.h | 2 ++ 4 files changed, 73 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 0d028ead99e8..ee7db3119b18 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -140,6 +140,7 @@ EXPORT_SYMBOL_GPL(ahci_shost_attrs); struct device_attribute *ahci_sdev_attrs[] = { &dev_attr_sw_activity, &dev_attr_unload_heads, + &dev_attr_ncq_prio_enable, NULL }; EXPORT_SYMBOL_GPL(ahci_sdev_attrs); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 8346faf63337..b294339159a4 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -787,7 +787,8 @@ int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev, if (tf->flags & ATA_TFLAG_FUA) tf->device |= 1 << 7; - if (dev->flags & ATA_DFLAG_NCQ_PRIO) { + if ((dev->flags & ATA_DFLAG_NCQ_PRIO) && + (dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLE)) { if (class == IOPRIO_CLASS_RT) tf->hob_nsect |= ATA_PRIO_HIGH << ATA_SHIFT_PRIO; diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 2bccc3c7de48..87597a3f6149 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -271,6 +271,73 @@ DEVICE_ATTR(unload_heads, S_IRUGO | S_IWUSR, ata_scsi_park_show, ata_scsi_park_store); EXPORT_SYMBOL_GPL(dev_attr_unload_heads); +static ssize_t ata_ncq_prio_enable_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct scsi_device *sdev = to_scsi_device(device); + struct ata_port *ap; + struct ata_device *dev; + bool ncq_prio_enable; + int rc = 0; + + ap = ata_shost_to_port(sdev->host); + + spin_lock_irq(ap->lock); + dev = ata_scsi_find_dev(ap, sdev); + if (!dev) { + rc = -ENODEV; + goto unlock; + } + + ncq_prio_enable = dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLE; + +unlock: + spin_unlock_irq(ap->lock); + + return rc ? rc : snprintf(buf, 20, "%u\n", ncq_prio_enable); +} + +static ssize_t ata_ncq_prio_enable_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct scsi_device *sdev = to_scsi_device(device); + struct ata_port *ap; + struct ata_device *dev; + long int input; + unsigned long flags; + int rc; + + rc = kstrtol(buf, 10, &input); + if (rc) + return rc; + if ((input < 0) || (input > 1)) + return -EINVAL; + + ap = ata_shost_to_port(sdev->host); + + spin_lock_irqsave(ap->lock, flags); + dev = ata_scsi_find_dev(ap, sdev); + if (unlikely(!dev)) { + rc = -ENODEV; + goto unlock; + } + + if (input) + dev->flags |= ATA_DFLAG_NCQ_PRIO_ENABLE; + else + dev->flags &= ~ATA_DFLAG_NCQ_PRIO_ENABLE; + +unlock: + spin_unlock_irqrestore(ap->lock, flags); + + return rc ? rc : len; +} + +DEVICE_ATTR(ncq_prio_enable, S_IRUGO | S_IWUSR, + ata_ncq_prio_enable_show, ata_ncq_prio_enable_store); +EXPORT_SYMBOL_GPL(dev_attr_ncq_prio_enable); + void ata_scsi_set_sense(struct ata_device *dev, struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq) { @@ -402,6 +469,7 @@ EXPORT_SYMBOL_GPL(dev_attr_sw_activity); struct device_attribute *ata_common_sdev_attrs[] = { &dev_attr_unload_heads, + &dev_attr_ncq_prio_enable, NULL }; EXPORT_SYMBOL_GPL(ata_common_sdev_attrs); diff --git a/include/linux/libata.h b/include/linux/libata.h index 90b69a6293a3..c170be548b7f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -167,6 +167,7 @@ enum { ATA_DFLAG_UNLOCK_HPA = (1 << 18), /* unlock HPA */ ATA_DFLAG_NCQ_SEND_RECV = (1 << 19), /* device supports NCQ SEND and RECV */ ATA_DFLAG_NCQ_PRIO = (1 << 20), /* device supports NCQ priority */ + ATA_DFLAG_NCQ_PRIO_ENABLE = (1 << 21), /* Priority cmds sent to dev */ ATA_DFLAG_INIT_MASK = (1 << 24) - 1, ATA_DFLAG_DETACH = (1 << 24), @@ -545,6 +546,7 @@ typedef void (*ata_postreset_fn_t)(struct ata_link *link, unsigned int *classes) extern struct device_attribute dev_attr_link_power_management_policy; extern struct device_attribute dev_attr_unload_heads; +extern struct device_attribute dev_attr_ncq_prio_enable; extern struct device_attribute dev_attr_em_message_type; extern struct device_attribute dev_attr_em_message; extern struct device_attribute dev_attr_sw_activity; -- cgit v1.2.3 From 3c3fcb45d524feb5d14a14f332e3eec7f2aff8f3 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 19 Oct 2016 15:10:59 +0100 Subject: sched/fair: Kill the unused 'sched_shares_window_ns' tunable The last user of this tunable was removed in 2012 in commit: 82958366cfea ("sched: Replace update_shares weight distribution with per-entity computation") Delete it since its very existence confuses people. Signed-off-by: Matt Fleming Cc: Dietmar Eggemann Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul Turner Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20161019141059.26408-1-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- include/linux/sched/sysctl.h | 1 - kernel/sched/fair.c | 7 ------- kernel/sysctl.c | 7 ------- 3 files changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 22db1e63707e..441145351301 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -36,7 +36,6 @@ extern unsigned int sysctl_numa_balancing_scan_size; extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; extern unsigned int sysctl_sched_time_avg; -extern unsigned int sysctl_sched_shares_window; int sched_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d941c97dfbc3..79d464a04417 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -93,13 +93,6 @@ unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL; const_debug unsigned int sysctl_sched_migration_cost = 500000UL; -/* - * The exponential sliding window over which load is averaged for shares - * distribution. - * (default: 10msec) - */ -unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL; - #ifdef CONFIG_CFS_BANDWIDTH /* * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 706309f9ed84..739fb17371af 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -347,13 +347,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "sched_shares_window_ns", - .data = &sysctl_sched_shares_window, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, #ifdef CONFIG_SCHEDSTATS { .procname = "sched_schedstats", -- cgit v1.2.3 From 50066a042da5457ae5b6397425f0a7ca556231e3 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Wed, 19 Oct 2016 18:03:42 -0700 Subject: HID: wacom: generic: Add support for height, tilt, and twist usages The HID standard defines usages that allow digitizers to report the pen's height, tilt, and rotation and which are used by Wacom's new "MobileStudio Pro" devices. Note that 'hidinput_calc_abs_res' expects ABS_Z (historically used by our driver to report twist) to have linear units. To ensure it calculates a resolution with the actually-angular units provided in the HID descriptor we nedd to lie and tell it we're calculating it for the (rotational) ABS_RZ axis instead. Signed-off-by: Jason Gerecke Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 28 ++++++++++++++++++++++++++-- include/linux/hid.h | 3 +++ 2 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 8071c18bf9c2..3f4ba53192c0 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1440,6 +1440,11 @@ static void wacom_map_usage(struct input_dev *input, struct hid_usage *usage, { int fmin = field->logical_minimum; int fmax = field->logical_maximum; + int resolution_code = code; + + if (usage->hid == HID_DG_TWIST) { + resolution_code = ABS_RZ; + } usage->type = type; usage->code = code; @@ -1450,7 +1455,7 @@ static void wacom_map_usage(struct input_dev *input, struct hid_usage *usage, case EV_ABS: input_set_abs_params(input, code, fmin, fmax, fuzz, 0); input_abs_set_res(input, code, - hidinput_calc_abs_res(field, code)); + hidinput_calc_abs_res(field, resolution_code)); break; case EV_KEY: input_set_capability(input, EV_KEY, code); @@ -1475,6 +1480,9 @@ static void wacom_wac_pen_usage_mapping(struct hid_device *hdev, case HID_GD_Y: wacom_map_usage(input, usage, field, EV_ABS, ABS_Y, 4); break; + case HID_GD_Z: + wacom_map_usage(input, usage, field, EV_ABS, ABS_DISTANCE, 0); + break; case HID_DG_TIPPRESSURE: wacom_map_usage(input, usage, field, EV_ABS, ABS_PRESSURE, 0); break; @@ -1485,6 +1493,15 @@ static void wacom_wac_pen_usage_mapping(struct hid_device *hdev, wacom_map_usage(input, usage, field, EV_KEY, BTN_TOOL_RUBBER, 0); break; + case HID_DG_TILT_X: + wacom_map_usage(input, usage, field, EV_ABS, ABS_TILT_X, 0); + break; + case HID_DG_TILT_Y: + wacom_map_usage(input, usage, field, EV_ABS, ABS_TILT_Y, 0); + break; + case HID_DG_TWIST: + wacom_map_usage(input, usage, field, EV_ABS, ABS_Z, 0); + break; case HID_DG_ERASER: case HID_DG_TIPSWITCH: wacom_map_usage(input, usage, field, EV_KEY, BTN_TOUCH, 0); @@ -1508,8 +1525,15 @@ static int wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field, struct wacom_wac *wacom_wac = &wacom->wacom_wac; struct input_dev *input = wacom_wac->pen_input; - /* checking which Tool / tip switch to send */ switch (usage->hid) { + case HID_GD_Z: + /* + * HID_GD_Z "should increase as the control's position is + * moved from high to low", while ABS_DISTANCE instead + * increases in value as the tool moves from low to high. + */ + value = field->logical_maximum - value; + break; case HID_DG_INRANGE: wacom_wac->hid_data.inrange_state = value; return 0; diff --git a/include/linux/hid.h b/include/linux/hid.h index b2ec82712baa..e712101a1670 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -232,6 +232,9 @@ struct hid_item { #define HID_DG_TABLETFUNCTIONKEY 0x000d0039 #define HID_DG_PROGRAMCHANGEKEY 0x000d003a #define HID_DG_INVERT 0x000d003c +#define HID_DG_TILT_X 0x000d003d +#define HID_DG_TILT_Y 0x000d003e +#define HID_DG_TWIST 0x000d0041 #define HID_DG_TIPSWITCH 0x000d0042 #define HID_DG_TIPSWITCH2 0x000d0043 #define HID_DG_BARRELSWITCH 0x000d0044 -- cgit v1.2.3 From 93aab7fa4f8091d8fe2aed7e79a650fc1c084512 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Wed, 19 Oct 2016 18:03:52 -0700 Subject: HID: wacom: generic: Add support for battery status on pen and pad interfaces Adds support for usages that may appear on the pen or pad interface which report the state of the tablet battery. Signed-off-by: Jason Gerecke Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 41 ++++++++++++++++++++++++++++++++++++++++- drivers/hid/wacom_wac.h | 6 ++++++ include/linux/hid.h | 1 + 3 files changed, 47 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 70de1fa930cc..f3edecf52c06 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1525,6 +1525,10 @@ static void wacom_wac_pad_usage_mapping(struct hid_device *hdev, unsigned equivalent_usage = wacom_equivalent_usage(usage->hid); switch (equivalent_usage) { + case WACOM_HID_WD_BATTERY_LEVEL: + case WACOM_HID_WD_BATTERY_CHARGING: + features->quirks |= WACOM_QUIRK_BATTERY; + break; case WACOM_HID_WD_ACCELEROMETER_X: __set_bit(INPUT_PROP_ACCELEROMETER, input->propbit); wacom_map_usage(input, usage, field, EV_ABS, ABS_X, 0); @@ -1574,8 +1578,25 @@ static int wacom_wac_pad_event(struct hid_device *hdev, struct hid_field *field, wacom_wac->hid_data.inrange_state |= value; } - if (equivalent_usage != WACOM_HID_WD_TOUCHRINGSTATUS) + switch (equivalent_usage) { + case WACOM_HID_WD_BATTERY_LEVEL: + wacom_wac->hid_data.battery_capacity = value; + wacom_wac->hid_data.bat_connected = 1; + return 0; + + case WACOM_HID_WD_BATTERY_CHARGING: + wacom_wac->hid_data.bat_charging = value; + wacom_wac->hid_data.ps_connected = value; + wacom_wac->hid_data.bat_connected = 1; + return 0; + + case WACOM_HID_WD_TOUCHRINGSTATUS: + return 0; + + default: input_event(input, usage->type, usage->code, value); + break; + } return 0; } @@ -1594,6 +1615,7 @@ static void wacom_wac_pad_report(struct hid_device *hdev, { struct wacom *wacom = hid_get_drvdata(hdev); struct wacom_wac *wacom_wac = &wacom->wacom_wac; + struct wacom_features *features = &wacom_wac->features; struct input_dev *input = wacom_wac->pad_input; bool active = wacom_wac->hid_data.inrange_state != 0; @@ -1604,6 +1626,16 @@ static void wacom_wac_pad_report(struct hid_device *hdev, if (wacom_equivalent_usage(report->field[0]->physical) == HID_DG_TABLETFUNCTIONKEY) input_event(input, EV_ABS, ABS_MISC, active ? PAD_DEVICE_ID : 0); + if (features->quirks & WACOM_QUIRK_BATTERY) { + int capacity = wacom_wac->hid_data.battery_capacity; + bool charging = wacom_wac->hid_data.bat_charging; + bool connected = wacom_wac->hid_data.bat_connected; + bool powered = wacom_wac->hid_data.ps_connected; + + wacom_notify_battery(wacom_wac, capacity, charging, + connected, powered); + } + input_sync(input); } @@ -1633,6 +1665,9 @@ static void wacom_wac_pen_usage_mapping(struct hid_device *hdev, case HID_DG_INRANGE: wacom_map_usage(input, usage, field, EV_KEY, BTN_TOOL_PEN, 0); break; + case HID_DG_BATTERYSTRENGTH: + features->quirks |= WACOM_QUIRK_BATTERY; + break; case HID_DG_INVERT: wacom_map_usage(input, usage, field, EV_KEY, BTN_TOOL_RUBBER, 0); @@ -1703,6 +1738,10 @@ static int wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field, if (!(features->quirks & WACOM_QUIRK_SENSE)) wacom_wac->hid_data.sense_state = value; return 0; + case HID_DG_BATTERYSTRENGTH: + wacom_wac->hid_data.battery_capacity = value; + wacom_wac->hid_data.bat_connected = 1; + break; case HID_DG_INVERT: wacom_wac->hid_data.invert_state = value; return 0; diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h index 1f7c4a86d91b..7418c9715d31 100644 --- a/drivers/hid/wacom_wac.h +++ b/drivers/hid/wacom_wac.h @@ -101,6 +101,8 @@ #define WACOM_HID_WD_ACCELEROMETER_X (WACOM_HID_UP_WACOMDIGITIZER | 0x0401) #define WACOM_HID_WD_ACCELEROMETER_Y (WACOM_HID_UP_WACOMDIGITIZER | 0x0402) #define WACOM_HID_WD_ACCELEROMETER_Z (WACOM_HID_UP_WACOMDIGITIZER | 0x0403) +#define WACOM_HID_WD_BATTERY_CHARGING (WACOM_HID_UP_WACOMDIGITIZER | 0x0404) +#define WACOM_HID_WD_BATTERY_LEVEL (WACOM_HID_UP_WACOMDIGITIZER | 0x043b) #define WACOM_HID_WD_EXPRESSKEY00 (WACOM_HID_UP_WACOMDIGITIZER | 0x0910) #define WACOM_HID_WD_BUTTONHOME (WACOM_HID_UP_WACOMDIGITIZER | 0x0990) #define WACOM_HID_WD_BUTTONUP (WACOM_HID_UP_WACOMDIGITIZER | 0x0991) @@ -257,6 +259,10 @@ struct hid_data { int last_slot_field; int num_expected; int num_received; + int battery_capacity; + int bat_charging; + int bat_connected; + int ps_connected; }; struct wacom_remote_data { diff --git a/include/linux/hid.h b/include/linux/hid.h index e712101a1670..3baa2f962e48 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -231,6 +231,7 @@ struct hid_item { #define HID_DG_TAP 0x000d0035 #define HID_DG_TABLETFUNCTIONKEY 0x000d0039 #define HID_DG_PROGRAMCHANGEKEY 0x000d003a +#define HID_DG_BATTERYSTRENGTH 0x000d003b #define HID_DG_INVERT 0x000d003c #define HID_DG_TILT_X 0x000d003d #define HID_DG_TILT_Y 0x000d003e -- cgit v1.2.3 From 8b6b4135e4fb2b537f33b811c13f77bee25ca8d3 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Thu, 20 Oct 2016 13:55:19 -0400 Subject: net: use core MTU range checking in WAN drivers - set min/max_mtu in all hdlc drivers, remove hdlc_change_mtu - sent max_mtu in lec driver, remove lec_change_mtu - set min/max_mtu in x25_asy driver CC: netdev@vger.kernel.org CC: Krzysztof Halasa CC: Krzysztof Halasa CC: Jan "Yenya" Kasprzak CC: Francois Romieu CC: Kevin Curtis CC: Zhao Qiang Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- drivers/char/pcmcia/synclink_cs.c | 1 - drivers/net/wan/c101.c | 1 - drivers/net/wan/cosa.c | 1 - drivers/net/wan/dscc4.c | 1 - drivers/net/wan/farsync.c | 1 - drivers/net/wan/fsl_ucc_hdlc.c | 1 - drivers/net/wan/hdlc.c | 11 ++--------- drivers/net/wan/hdlc_fr.c | 3 ++- drivers/net/wan/hostess_sv11.c | 1 - drivers/net/wan/ixp4xx_hss.c | 1 - drivers/net/wan/lmc/lmc_main.c | 1 - drivers/net/wan/n2.c | 1 - drivers/net/wan/pc300too.c | 1 - drivers/net/wan/pci200syn.c | 1 - drivers/net/wan/sealevel.c | 1 - drivers/net/wan/wanxl.c | 1 - drivers/net/wan/x25_asy.c | 5 ++--- drivers/tty/synclink.c | 1 - drivers/tty/synclink_gt.c | 1 - drivers/tty/synclinkmp.c | 1 - include/linux/hdlc.h | 2 -- net/atm/lec.c | 11 +---------- 22 files changed, 7 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index d28922df01d7..a7dd5f4f2c5a 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -4248,7 +4248,6 @@ static void hdlcdev_rx(MGSLPC_INFO *info, char *buf, int size) static const struct net_device_ops hdlcdev_ops = { .ndo_open = hdlcdev_open, .ndo_stop = hdlcdev_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = hdlcdev_ioctl, .ndo_tx_timeout = hdlcdev_tx_timeout, diff --git a/drivers/net/wan/c101.c b/drivers/net/wan/c101.c index 09a50751763b..2371e078afbb 100644 --- a/drivers/net/wan/c101.c +++ b/drivers/net/wan/c101.c @@ -302,7 +302,6 @@ static void c101_destroy_card(card_t *card) static const struct net_device_ops c101_ops = { .ndo_open = c101_open, .ndo_stop = c101_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = c101_ioctl, }; diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index b87fe0a01c69..087eb266601f 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -432,7 +432,6 @@ module_exit(cosa_exit); static const struct net_device_ops cosa_ops = { .ndo_open = cosa_net_open, .ndo_stop = cosa_net_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = cosa_net_ioctl, .ndo_tx_timeout = cosa_net_timeout, diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c index 629225980463..7351e5440ed7 100644 --- a/drivers/net/wan/dscc4.c +++ b/drivers/net/wan/dscc4.c @@ -887,7 +887,6 @@ static inline int dscc4_set_quartz(struct dscc4_dev_priv *dpriv, int hz) static const struct net_device_ops dscc4_ops = { .ndo_open = dscc4_open, .ndo_stop = dscc4_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = dscc4_ioctl, .ndo_tx_timeout = dscc4_tx_timeout, diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index 3c9cbf908ec7..03696d35ee9c 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -2394,7 +2394,6 @@ fst_init_card(struct fst_card_info *card) static const struct net_device_ops fst_ops = { .ndo_open = fst_open, .ndo_stop = fst_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = fst_ioctl, .ndo_tx_timeout = fst_tx_timeout, diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 65647533b401..e38ce4da3efb 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -992,7 +992,6 @@ static const struct dev_pm_ops uhdlc_pm_ops = { static const struct net_device_ops uhdlc_ops = { .ndo_open = uhdlc_open, .ndo_stop = uhdlc_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = uhdlc_ioctl, }; diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c index 9bd4aa8083ce..7221a53b8b14 100644 --- a/drivers/net/wan/hdlc.c +++ b/drivers/net/wan/hdlc.c @@ -46,14 +46,6 @@ static const char* version = "HDLC support module revision 1.22"; static struct hdlc_proto *first_proto; -int hdlc_change_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < 68) || (new_mtu > HDLC_MAX_MTU)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - static int hdlc_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p, struct net_device *orig_dev) { @@ -237,6 +229,8 @@ static void hdlc_setup_dev(struct net_device *dev) dev->flags = IFF_POINTOPOINT | IFF_NOARP; dev->priv_flags = IFF_WAN_HDLC; dev->mtu = HDLC_MAX_MTU; + dev->min_mtu = 68; + dev->max_mtu = HDLC_MAX_MTU; dev->type = ARPHRD_RAWHDLC; dev->hard_header_len = 16; dev->addr_len = 0; @@ -353,7 +347,6 @@ MODULE_AUTHOR("Krzysztof Halasa "); MODULE_DESCRIPTION("HDLC support module"); MODULE_LICENSE("GPL v2"); -EXPORT_SYMBOL(hdlc_change_mtu); EXPORT_SYMBOL(hdlc_start_xmit); EXPORT_SYMBOL(hdlc_open); EXPORT_SYMBOL(hdlc_close); diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index b6e0cfb095d3..eb915281197e 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -1053,7 +1053,6 @@ static void pvc_setup(struct net_device *dev) static const struct net_device_ops pvc_ops = { .ndo_open = pvc_open, .ndo_stop = pvc_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = pvc_xmit, .ndo_do_ioctl = pvc_ioctl, }; @@ -1096,6 +1095,8 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type) } dev->netdev_ops = &pvc_ops; dev->mtu = HDLC_MAX_MTU; + dev->min_mtu = 68; + dev->max_mtu = HDLC_MAX_MTU; dev->priv_flags |= IFF_NO_QUEUE; dev->ml_priv = pvc; diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c index 3d741663fd67..dd6bb3364ad2 100644 --- a/drivers/net/wan/hostess_sv11.c +++ b/drivers/net/wan/hostess_sv11.c @@ -180,7 +180,6 @@ static int hostess_attach(struct net_device *dev, unsigned short encoding, static const struct net_device_ops hostess_ops = { .ndo_open = hostess_open, .ndo_stop = hostess_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = hostess_ioctl, }; diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c index e7bbdb7af53a..6a505c26a3e7 100644 --- a/drivers/net/wan/ixp4xx_hss.c +++ b/drivers/net/wan/ixp4xx_hss.c @@ -1321,7 +1321,6 @@ static int hss_hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static const struct net_device_ops hss_hdlc_ops = { .ndo_open = hss_hdlc_open, .ndo_stop = hss_hdlc_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = hss_hdlc_ioctl, }; diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index 299140c04556..001b7796740d 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -808,7 +808,6 @@ static int lmc_attach(struct net_device *dev, unsigned short encoding, static const struct net_device_ops lmc_ops = { .ndo_open = lmc_open, .ndo_stop = lmc_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = lmc_ioctl, .ndo_tx_timeout = lmc_driver_timeout, diff --git a/drivers/net/wan/n2.c b/drivers/net/wan/n2.c index 315bf09d6a20..c8f4517db3a0 100644 --- a/drivers/net/wan/n2.c +++ b/drivers/net/wan/n2.c @@ -330,7 +330,6 @@ static void n2_destroy_card(card_t *card) static const struct net_device_ops n2_ops = { .ndo_open = n2_open, .ndo_stop = n2_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = n2_ioctl, }; diff --git a/drivers/net/wan/pc300too.c b/drivers/net/wan/pc300too.c index db363856e0b5..e1dd1ec18d64 100644 --- a/drivers/net/wan/pc300too.c +++ b/drivers/net/wan/pc300too.c @@ -291,7 +291,6 @@ static void pc300_pci_remove_one(struct pci_dev *pdev) static const struct net_device_ops pc300_ops = { .ndo_open = pc300_open, .ndo_stop = pc300_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = pc300_ioctl, }; diff --git a/drivers/net/wan/pci200syn.c b/drivers/net/wan/pci200syn.c index e8455621390e..4e437c599e9a 100644 --- a/drivers/net/wan/pci200syn.c +++ b/drivers/net/wan/pci200syn.c @@ -270,7 +270,6 @@ static void pci200_pci_remove_one(struct pci_dev *pdev) static const struct net_device_ops pci200_ops = { .ndo_open = pci200_open, .ndo_stop = pci200_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = pci200_ioctl, }; diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c index 27860b4f5908..fbb5aa2c4d8f 100644 --- a/drivers/net/wan/sealevel.c +++ b/drivers/net/wan/sealevel.c @@ -174,7 +174,6 @@ static int sealevel_attach(struct net_device *dev, unsigned short encoding, static const struct net_device_ops sealevel_ops = { .ndo_open = sealevel_open, .ndo_stop = sealevel_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = sealevel_ioctl, }; diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c index a20d688d2595..0c7317520ed3 100644 --- a/drivers/net/wan/wanxl.c +++ b/drivers/net/wan/wanxl.c @@ -551,7 +551,6 @@ static void wanxl_pci_remove_one(struct pci_dev *pdev) static const struct net_device_ops wanxl_ops = { .ndo_open = wanxl_open, .ndo_stop = wanxl_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = wanxl_ioctl, .ndo_get_stats = wanxl_get_stats, diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 1bc5e93d2a34..878b05d06fc7 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -124,9 +124,6 @@ static int x25_asy_change_mtu(struct net_device *dev, int newmtu) unsigned char *xbuff, *rbuff; int len; - if (newmtu > 65534) - return -EINVAL; - len = 2 * newmtu; xbuff = kmalloc(len + 4, GFP_ATOMIC); rbuff = kmalloc(len + 4, GFP_ATOMIC); @@ -751,6 +748,8 @@ static void x25_asy_setup(struct net_device *dev) */ dev->mtu = SL_MTU; + dev->min_mtu = 0; + dev->max_mtu = 65534; dev->netdev_ops = &x25_asy_netdev_ops; dev->watchdog_timeo = HZ*20; dev->hard_header_len = 0; diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c index c13e27ecb0b7..415885c56435 100644 --- a/drivers/tty/synclink.c +++ b/drivers/tty/synclink.c @@ -7973,7 +7973,6 @@ static void hdlcdev_rx(struct mgsl_struct *info, char *buf, int size) static const struct net_device_ops hdlcdev_ops = { .ndo_open = hdlcdev_open, .ndo_stop = hdlcdev_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = hdlcdev_ioctl, .ndo_tx_timeout = hdlcdev_tx_timeout, diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c index 7aca2d4670e4..8267bcf2405e 100644 --- a/drivers/tty/synclink_gt.c +++ b/drivers/tty/synclink_gt.c @@ -1768,7 +1768,6 @@ static void hdlcdev_rx(struct slgt_info *info, char *buf, int size) static const struct net_device_ops hdlcdev_ops = { .ndo_open = hdlcdev_open, .ndo_stop = hdlcdev_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = hdlcdev_ioctl, .ndo_tx_timeout = hdlcdev_tx_timeout, diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c index dec156586de1..d66620f7eaa3 100644 --- a/drivers/tty/synclinkmp.c +++ b/drivers/tty/synclinkmp.c @@ -1887,7 +1887,6 @@ static void hdlcdev_rx(SLMP_INFO *info, char *buf, int size) static const struct net_device_ops hdlcdev_ops = { .ndo_open = hdlcdev_open, .ndo_stop = hdlcdev_close, - .ndo_change_mtu = hdlc_change_mtu, .ndo_start_xmit = hdlc_start_xmit, .ndo_do_ioctl = hdlcdev_ioctl, .ndo_tx_timeout = hdlcdev_tx_timeout, diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h index e31bcd4c7859..97585d9679f3 100644 --- a/include/linux/hdlc.h +++ b/include/linux/hdlc.h @@ -93,8 +93,6 @@ static __inline__ void debug_frame(const struct sk_buff *skb) int hdlc_open(struct net_device *dev); /* Must be called by hardware driver when HDLC device is being closed */ void hdlc_close(struct net_device *dev); -/* May be used by hardware driver */ -int hdlc_change_mtu(struct net_device *dev, int new_mtu); /* Must be pointed to by hw driver's dev->netdev_ops->ndo_start_xmit */ netdev_tx_t hdlc_start_xmit(struct sk_buff *skb, struct net_device *dev); diff --git a/net/atm/lec.c b/net/atm/lec.c index 5d2693826afb..779b3fa6052d 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -544,15 +544,6 @@ send_to_lecd(struct lec_priv *priv, atmlec_msg_type type, return 0; } -/* shamelessly stolen from drivers/net/net_init.c */ -static int lec_change_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < 68) || (new_mtu > 18190)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - static void lec_set_multicast_list(struct net_device *dev) { /* @@ -565,7 +556,6 @@ static const struct net_device_ops lec_netdev_ops = { .ndo_open = lec_open, .ndo_stop = lec_close, .ndo_start_xmit = lec_start_xmit, - .ndo_change_mtu = lec_change_mtu, .ndo_tx_timeout = lec_tx_timeout, .ndo_set_rx_mode = lec_set_multicast_list, }; @@ -742,6 +732,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) if (!dev_lec[i]) return -ENOMEM; dev_lec[i]->netdev_ops = &lec_netdev_ops; + dev_lec[i]->max_mtu = 18190; snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i); if (register_netdev(dev_lec[i])) { free_netdev(dev_lec[i]); -- cgit v1.2.3 From b3e3893e1253692c3d2b8e8ebd5a26183ed30443 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Thu, 20 Oct 2016 13:55:22 -0400 Subject: net: use core MTU range checking in misc drivers firewire-net: - set min/max_mtu - remove fwnet_change_mtu nes: - set max_mtu - clean up nes_netdev_change_mtu xpnet: - set min/max_mtu - remove xpnet_dev_change_mtu hippi: - set min/max_mtu - remove hippi_change_mtu batman-adv: - set max_mtu - remove batadv_interface_change_mtu - initialization is a little async, not 100% certain that max_mtu is set in the optimal place, don't have hardware to test with rionet: - set min/max_mtu - remove rionet_change_mtu slip: - set min/max_mtu - streamline sl_change_mtu um/net_kern: - remove pointless ndo_change_mtu hsi/clients/ssi_protocol: - use core MTU range checking - remove now redundant ssip_pn_set_mtu ipoib: - set a default max MTU value - Note: ipoib's actual max MTU can vary, depending on if the device is in connected mode or not, so we'll just set the max_mtu value to the max possible, and let the ndo_change_mtu function continue to validate any new MTU change requests with checks for CM or not. Note that ipoib has no min_mtu set, and thus, the network core's mtu > 0 check is the only lower bounds here. mptlan: - use net core MTU range checking - remove now redundant mpt_lan_change_mtu fddi: - min_mtu = 21, max_mtu = 4470 - remove now redundant fddi_change_mtu (including export) fjes: - min_mtu = 8192, max_mtu = 65536 - The max_mtu value is actually one over IP_MAX_MTU here, but the idea is to get past the core net MTU range checks so fjes_change_mtu can validate a new MTU against what it supports (see fjes_support_mtu in fjes_hw.c) hsr: - min_mtu = 0 (calls ether_setup, max_mtu is 1500) f_phonet: - min_mtu = 6, max_mtu = 65541 u_ether: - min_mtu = 14, max_mtu = 15412 phonet/pep-gprs: - min_mtu = 576, max_mtu = 65530 - remove redundant gprs_set_mtu CC: netdev@vger.kernel.org CC: linux-rdma@vger.kernel.org CC: Stefan Richter CC: Faisal Latif CC: linux-rdma@vger.kernel.org CC: Cliff Whickman CC: Robin Holt CC: Jes Sorensen CC: Marek Lindner CC: Simon Wunderlich CC: Antonio Quartulli CC: Sathya Prakash CC: Chaitra P B CC: Suganath Prabu Subramani CC: MPT-FusionLinux.pdl@broadcom.com CC: Sebastian Reichel CC: Felipe Balbi CC: Arvid Brodin CC: Remi Denis-Courmont Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- arch/um/drivers/net_kern.c | 8 -------- drivers/firewire/net.c | 18 ++++-------------- drivers/hsi/clients/ssi_protocol.c | 14 ++++---------- drivers/infiniband/hw/nes/nes.c | 1 - drivers/infiniband/hw/nes/nes.h | 4 ++-- drivers/infiniband/hw/nes/nes_nic.c | 10 +++------- drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 + drivers/message/fusion/mptlan.c | 15 ++++----------- drivers/misc/sgi-xp/xpnet.c | 21 ++++----------------- drivers/net/fddi/skfp/skfddi.c | 1 - drivers/net/fjes/fjes_main.c | 2 ++ drivers/net/hippi/rrunner.c | 1 - drivers/net/rionet.c | 15 +++------------ drivers/net/slip/slip.c | 11 +++++------ drivers/usb/gadget/function/f_phonet.c | 11 ++--------- drivers/usb/gadget/function/u_ether.c | 14 ++++---------- include/linux/fddidevice.h | 1 - include/linux/hippidevice.h | 1 - net/802/fddi.c | 11 ++--------- net/802/hippi.c | 14 ++------------ net/batman-adv/soft-interface.c | 13 +------------ net/hsr/hsr_device.c | 1 + net/phonet/pep-gprs.c | 12 ++---------- 23 files changed, 46 insertions(+), 154 deletions(-) (limited to 'include/linux') diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 2cd5b6874c7b..1669240c7a25 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -256,13 +256,6 @@ static void uml_net_tx_timeout(struct net_device *dev) netif_wake_queue(dev); } -static int uml_net_change_mtu(struct net_device *dev, int new_mtu) -{ - dev->mtu = new_mtu; - - return 0; -} - #ifdef CONFIG_NET_POLL_CONTROLLER static void uml_net_poll_controller(struct net_device *dev) { @@ -374,7 +367,6 @@ static const struct net_device_ops uml_netdev_ops = { .ndo_set_rx_mode = uml_net_set_multicast_list, .ndo_tx_timeout = uml_net_tx_timeout, .ndo_set_mac_address = eth_mac_addr, - .ndo_change_mtu = uml_net_change_mtu, .ndo_validate_addr = eth_validate_addr, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = uml_net_poll_controller, diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index 309311b1faae..8430222151fc 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -1349,15 +1349,6 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net) return NETDEV_TX_OK; } -static int fwnet_change_mtu(struct net_device *net, int new_mtu) -{ - if (new_mtu < 68) - return -EINVAL; - - net->mtu = new_mtu; - return 0; -} - static const struct ethtool_ops fwnet_ethtool_ops = { .get_link = ethtool_op_get_link, }; @@ -1366,7 +1357,6 @@ static const struct net_device_ops fwnet_netdev_ops = { .ndo_open = fwnet_open, .ndo_stop = fwnet_stop, .ndo_start_xmit = fwnet_tx, - .ndo_change_mtu = fwnet_change_mtu, }; static void fwnet_init_dev(struct net_device *net) @@ -1435,7 +1425,6 @@ static int fwnet_probe(struct fw_unit *unit, struct net_device *net; bool allocated_netdev = false; struct fwnet_device *dev; - unsigned max_mtu; int ret; union fwnet_hwaddr *ha; @@ -1478,9 +1467,10 @@ static int fwnet_probe(struct fw_unit *unit, * Use the RFC 2734 default 1500 octets or the maximum payload * as initial MTU */ - max_mtu = (1 << (card->max_receive + 1)) - - sizeof(struct rfc2734_header) - IEEE1394_GASP_HDR_SIZE; - net->mtu = min(1500U, max_mtu); + net->max_mtu = (1 << (card->max_receive + 1)) + - sizeof(struct rfc2734_header) - IEEE1394_GASP_HDR_SIZE; + net->mtu = min(1500U, net->max_mtu); + net->min_mtu = ETH_MIN_MTU; /* Set our hardware address while we're at it */ ha = (union fwnet_hwaddr *)net->dev_addr; diff --git a/drivers/hsi/clients/ssi_protocol.c b/drivers/hsi/clients/ssi_protocol.c index 6031cd146556..7ef819680acd 100644 --- a/drivers/hsi/clients/ssi_protocol.c +++ b/drivers/hsi/clients/ssi_protocol.c @@ -960,15 +960,6 @@ static int ssip_pn_stop(struct net_device *dev) return 0; } -static int ssip_pn_set_mtu(struct net_device *dev, int new_mtu) -{ - if (new_mtu > SSIP_MAX_MTU || new_mtu < PHONET_MIN_MTU) - return -EINVAL; - dev->mtu = new_mtu; - - return 0; -} - static void ssip_xmit_work(struct work_struct *work) { struct ssi_protocol *ssi = @@ -1060,7 +1051,6 @@ static const struct net_device_ops ssip_pn_ops = { .ndo_open = ssip_pn_open, .ndo_stop = ssip_pn_stop, .ndo_start_xmit = ssip_pn_xmit, - .ndo_change_mtu = ssip_pn_set_mtu, }; static void ssip_pn_setup(struct net_device *dev) @@ -1136,6 +1126,10 @@ static int ssi_protocol_probe(struct device *dev) goto out1; } + /* MTU range: 6 - 65535 */ + ssi->netdev->min_mtu = PHONET_MIN_MTU; + ssi->netdev->max_mtu = SSIP_MAX_MTU; + SET_NETDEV_DEV(ssi->netdev, dev); netif_carrier_off(ssi->netdev); err = register_netdev(ssi->netdev); diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 35cbb17bec12..2baa45a8e401 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -65,7 +65,6 @@ MODULE_DESCRIPTION("NetEffect RNIC Low-level iWARP Driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); -int max_mtu = 9000; int interrupt_mod_interval = 0; /* Interoperability */ diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index e7430c9254d3..85acd0843b50 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -83,6 +83,8 @@ #define NES_FIRST_QPN 64 #define NES_SW_CONTEXT_ALIGN 1024 +#define NES_MAX_MTU 9000 + #define NES_NIC_MAX_NICS 16 #define NES_MAX_ARP_TABLE_SIZE 4096 @@ -169,8 +171,6 @@ do { \ #include "nes_cm.h" #include "nes_mgt.h" -extern int max_mtu; -#define max_frame_len (max_mtu+ETH_HLEN) extern int interrupt_mod_interval; extern int nes_if_count; extern int mpa_version; diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 2b27d1351cf7..7f8597d6738b 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -981,20 +981,16 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) { struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; - int ret = 0; u8 jumbomode = 0; u32 nic_active; u32 nic_active_bit; u32 uc_all_active; u32 mc_all_active; - if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu)) - return -EINVAL; - netdev->mtu = new_mtu; nesvnic->max_frame_size = new_mtu + VLAN_ETH_HLEN; - if (netdev->mtu > 1500) { + if (netdev->mtu > ETH_DATA_LEN) { jumbomode=1; } nes_nic_init_timer_defaults(nesdev, jumbomode); @@ -1020,7 +1016,7 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); } - return ret; + return 0; } @@ -1658,7 +1654,7 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, netdev->watchdog_timeo = NES_TX_TIMEOUT; netdev->irq = nesdev->pcidev->irq; - netdev->mtu = ETH_DATA_LEN; + netdev->max_mtu = NES_MAX_MTU; netdev->hard_header_len = ETH_HLEN; netdev->addr_len = ETH_ALEN; netdev->type = ARPHRD_ETHER; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index cc059218c962..ae5d7cd100a5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2017,6 +2017,7 @@ static struct net_device *ipoib_add_port(const char *format, /* MTU will be reset when mcast join happens */ priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; + priv->dev->max_mtu = IPOIB_CM_MTU; priv->dev->neigh_priv_len = sizeof(struct ipoib_neigh); diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c index 6955c9e22d57..55dd71bbdc2a 100644 --- a/drivers/message/fusion/mptlan.c +++ b/drivers/message/fusion/mptlan.c @@ -548,16 +548,6 @@ mpt_lan_close(struct net_device *dev) return 0; } -/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -static int -mpt_lan_change_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < MPT_LAN_MIN_MTU) || (new_mtu > MPT_LAN_MAX_MTU)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /* Tx timeout handler. */ static void @@ -1304,7 +1294,6 @@ static const struct net_device_ops mpt_netdev_ops = { .ndo_open = mpt_lan_open, .ndo_stop = mpt_lan_close, .ndo_start_xmit = mpt_lan_sdu_send, - .ndo_change_mtu = mpt_lan_change_mtu, .ndo_tx_timeout = mpt_lan_tx_timeout, }; @@ -1375,6 +1364,10 @@ mpt_register_lan_device (MPT_ADAPTER *mpt_dev, int pnum) dev->netdev_ops = &mpt_netdev_ops; dev->watchdog_timeo = MPT_LAN_TX_TIMEOUT; + /* MTU range: 96 - 65280 */ + dev->min_mtu = MPT_LAN_MIN_MTU; + dev->max_mtu = MPT_LAN_MAX_MTU; + dlprintk((KERN_INFO MYNAM ": Finished registering dev " "and setting initial values\n")); diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c index 557f9782c53c..0c26eaf5f62b 100644 --- a/drivers/misc/sgi-xp/xpnet.c +++ b/drivers/misc/sgi-xp/xpnet.c @@ -118,6 +118,8 @@ static DEFINE_SPINLOCK(xpnet_broadcast_lock); * now, the default is 64KB. */ #define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES) +/* 68 comes from min TCP+IP+MAC header */ +#define XPNET_MIN_MTU 68 /* 32KB has been determined to be the ideal */ #define XPNET_DEF_MTU (0x8000UL) @@ -330,22 +332,6 @@ xpnet_dev_stop(struct net_device *dev) return 0; } -static int -xpnet_dev_change_mtu(struct net_device *dev, int new_mtu) -{ - /* 68 comes from min TCP+IP+MAC header */ - if ((new_mtu < 68) || (new_mtu > XPNET_MAX_MTU)) { - dev_err(xpnet, "ifconfig %s mtu %d failed; value must be " - "between 68 and %ld\n", dev->name, new_mtu, - XPNET_MAX_MTU); - return -EINVAL; - } - - dev->mtu = new_mtu; - dev_dbg(xpnet, "ifconfig %s mtu set to %d\n", dev->name, new_mtu); - return 0; -} - /* * Notification that the other end has received the message and * DMA'd the skb information. At this point, they are done with @@ -519,7 +505,6 @@ static const struct net_device_ops xpnet_netdev_ops = { .ndo_open = xpnet_dev_open, .ndo_stop = xpnet_dev_stop, .ndo_start_xmit = xpnet_dev_hard_start_xmit, - .ndo_change_mtu = xpnet_dev_change_mtu, .ndo_tx_timeout = xpnet_dev_tx_timeout, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, @@ -555,6 +540,8 @@ xpnet_init(void) xpnet_device->netdev_ops = &xpnet_netdev_ops; xpnet_device->mtu = XPNET_DEF_MTU; + xpnet_device->min_mtu = XPNET_MIN_MTU; + xpnet_device->max_mtu = XPNET_MAX_MTU; /* * Multicast assumes the LSB of the first octet is set for multicast diff --git a/drivers/net/fddi/skfp/skfddi.c b/drivers/net/fddi/skfp/skfddi.c index 51acc6d86e91..3a639180e4a0 100644 --- a/drivers/net/fddi/skfp/skfddi.c +++ b/drivers/net/fddi/skfp/skfddi.c @@ -166,7 +166,6 @@ static const struct net_device_ops skfp_netdev_ops = { .ndo_stop = skfp_close, .ndo_start_xmit = skfp_send_pkt, .ndo_get_stats = skfp_ctl_get_stats, - .ndo_change_mtu = fddi_change_mtu, .ndo_set_rx_mode = skfp_ctl_set_multicast_list, .ndo_set_mac_address = skfp_ctl_set_mac_address, .ndo_do_ioctl = skfp_ioctl, diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index f36eb4ad40b7..b77e4ecf3cf2 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -1316,6 +1316,8 @@ static void fjes_netdev_setup(struct net_device *netdev) netdev->netdev_ops = &fjes_netdev_ops; fjes_set_ethtool_ops(netdev); netdev->mtu = fjes_support_mtu[3]; + netdev->min_mtu = fjes_support_mtu[0]; + netdev->max_mtu = fjes_support_mtu[3]; netdev->flags |= IFF_BROADCAST; netdev->features |= NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_FILTER; } diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c index 95c0b45a68fb..f5a9728b89f3 100644 --- a/drivers/net/hippi/rrunner.c +++ b/drivers/net/hippi/rrunner.c @@ -68,7 +68,6 @@ static const struct net_device_ops rr_netdev_ops = { .ndo_stop = rr_close, .ndo_do_ioctl = rr_ioctl, .ndo_start_xmit = rr_start_xmit, - .ndo_change_mtu = hippi_change_mtu, .ndo_set_mac_address = hippi_mac_addr, }; diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index a31f4610b493..300bb1479b3a 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -466,17 +466,6 @@ static void rionet_set_msglevel(struct net_device *ndev, u32 value) rnet->msg_enable = value; } -static int rionet_change_mtu(struct net_device *ndev, int new_mtu) -{ - if ((new_mtu < 68) || (new_mtu > RIONET_MAX_MTU)) { - printk(KERN_ERR "%s: Invalid MTU size %d\n", - ndev->name, new_mtu); - return -EINVAL; - } - ndev->mtu = new_mtu; - return 0; -} - static const struct ethtool_ops rionet_ethtool_ops = { .get_drvinfo = rionet_get_drvinfo, .get_msglevel = rionet_get_msglevel, @@ -488,7 +477,6 @@ static const struct net_device_ops rionet_netdev_ops = { .ndo_open = rionet_open, .ndo_stop = rionet_close, .ndo_start_xmit = rionet_start_xmit, - .ndo_change_mtu = rionet_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, }; @@ -525,6 +513,9 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev) ndev->netdev_ops = &rionet_netdev_ops; ndev->mtu = RIONET_MAX_MTU; + /* MTU range: 68 - 4082 */ + ndev->min_mtu = ETH_MIN_MTU; + ndev->max_mtu = RIONET_MAX_MTU; ndev->features = NETIF_F_LLTX; SET_NETDEV_DEV(ndev, &mport->dev); ndev->ethtool_ops = &rionet_ethtool_ops; diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 9ed6d1c1ee45..7e933d8ff811 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -561,12 +561,7 @@ static int sl_change_mtu(struct net_device *dev, int new_mtu) { struct slip *sl = netdev_priv(dev); - if (new_mtu < 68 || new_mtu > 65534) - return -EINVAL; - - if (new_mtu != dev->mtu) - return sl_realloc_bufs(sl, new_mtu); - return 0; + return sl_realloc_bufs(sl, new_mtu); } /* Netdevice get statistics request */ @@ -663,6 +658,10 @@ static void sl_setup(struct net_device *dev) dev->addr_len = 0; dev->tx_queue_len = 10; + /* MTU range: 68 - 65534 */ + dev->min_mtu = 68; + dev->max_mtu = 65534; + /* New-style flags. */ dev->flags = IFF_NOARP|IFF_POINTOPOINT|IFF_MULTICAST; } diff --git a/drivers/usb/gadget/function/f_phonet.c b/drivers/usb/gadget/function/f_phonet.c index 0473d619d5bf..b4058f0000e4 100644 --- a/drivers/usb/gadget/function/f_phonet.c +++ b/drivers/usb/gadget/function/f_phonet.c @@ -261,19 +261,10 @@ out: return NETDEV_TX_OK; } -static int pn_net_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < PHONET_MIN_MTU) || (new_mtu > PHONET_MAX_MTU)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} - static const struct net_device_ops pn_netdev_ops = { .ndo_open = pn_net_open, .ndo_stop = pn_net_close, .ndo_start_xmit = pn_net_xmit, - .ndo_change_mtu = pn_net_mtu, }; static void pn_net_setup(struct net_device *dev) @@ -282,6 +273,8 @@ static void pn_net_setup(struct net_device *dev) dev->type = ARPHRD_PHONET; dev->flags = IFF_POINTOPOINT | IFF_NOARP; dev->mtu = PHONET_DEV_MTU; + dev->min_mtu = PHONET_MIN_MTU; + dev->max_mtu = PHONET_MAX_MTU; dev->hard_header_len = 1; dev->dev_addr[0] = PN_MEDIA_USB; dev->addr_len = 1; diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index 9c8c9ed1dc9e..39a6df1e2ded 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -142,15 +142,6 @@ static inline int qlen(struct usb_gadget *gadget, unsigned qmult) /* NETWORK DRIVER HOOKUP (to the layer above this driver) */ -static int ueth_change_mtu(struct net_device *net, int new_mtu) -{ - if (new_mtu <= ETH_HLEN || new_mtu > GETHER_MAX_ETH_FRAME_LEN) - return -ERANGE; - net->mtu = new_mtu; - - return 0; -} - static void eth_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *p) { struct eth_dev *dev = netdev_priv(net); @@ -736,7 +727,6 @@ static const struct net_device_ops eth_netdev_ops = { .ndo_open = eth_open, .ndo_stop = eth_stop, .ndo_start_xmit = eth_start_xmit, - .ndo_change_mtu = ueth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; @@ -799,6 +789,10 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g, net->ethtool_ops = &ops; + /* MTU range: 14 - 15412 */ + net->min_mtu = ETH_HLEN; + net->max_mtu = GETHER_MAX_ETH_FRAME_LEN; + dev->gadget = g; SET_NETDEV_DEV(net, &g->dev); SET_NETDEV_DEVTYPE(net, &gadget_type); diff --git a/include/linux/fddidevice.h b/include/linux/fddidevice.h index 9a79f0106da1..32c22cfb238b 100644 --- a/include/linux/fddidevice.h +++ b/include/linux/fddidevice.h @@ -26,7 +26,6 @@ #ifdef __KERNEL__ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev); -int fddi_change_mtu(struct net_device *dev, int new_mtu); struct net_device *alloc_fddidev(int sizeof_priv); #endif diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h index 8ec23fb0b412..402f99e328d4 100644 --- a/include/linux/hippidevice.h +++ b/include/linux/hippidevice.h @@ -32,7 +32,6 @@ struct hippi_cb { }; __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev); -int hippi_change_mtu(struct net_device *dev, int new_mtu); int hippi_mac_addr(struct net_device *dev, void *p); int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p); struct net_device *alloc_hippi_dev(int sizeof_priv); diff --git a/net/802/fddi.c b/net/802/fddi.c index 7d3a0af954e8..6356623fc238 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -141,15 +141,6 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev) EXPORT_SYMBOL(fddi_type_trans); -int fddi_change_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} -EXPORT_SYMBOL(fddi_change_mtu); - static const struct header_ops fddi_header_ops = { .create = fddi_header, }; @@ -161,6 +152,8 @@ static void fddi_setup(struct net_device *dev) dev->type = ARPHRD_FDDI; dev->hard_header_len = FDDI_K_SNAP_HLEN+3; /* Assume 802.2 SNAP hdr len + 3 pad bytes */ dev->mtu = FDDI_K_SNAP_DLEN; /* Assume max payload of 802.2 SNAP frame */ + dev->min_mtu = FDDI_K_SNAP_HLEN; + dev->max_mtu = FDDI_K_SNAP_DLEN; dev->addr_len = FDDI_K_ALEN; dev->tx_queue_len = 100; /* Long queues on FDDI */ dev->flags = IFF_BROADCAST | IFF_MULTICAST; diff --git a/net/802/hippi.c b/net/802/hippi.c index ade1a52cdcff..5e4427beab2b 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -116,18 +116,6 @@ __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev) EXPORT_SYMBOL(hippi_type_trans); -int hippi_change_mtu(struct net_device *dev, int new_mtu) -{ - /* - * HIPPI's got these nice large MTUs. - */ - if ((new_mtu < 68) || (new_mtu > 65280)) - return -EINVAL; - dev->mtu = new_mtu; - return 0; -} -EXPORT_SYMBOL(hippi_change_mtu); - /* * For HIPPI we will actually use the lower 4 bytes of the hardware * address as the I-FIELD rather than the actual hardware address. @@ -174,6 +162,8 @@ static void hippi_setup(struct net_device *dev) dev->type = ARPHRD_HIPPI; dev->hard_header_len = HIPPI_HLEN; dev->mtu = 65280; + dev->min_mtu = 68; + dev->max_mtu = 65280; dev->addr_len = HIPPI_ALEN; dev->tx_queue_len = 25 /* 5 */; memset(dev->broadcast, 0xFF, HIPPI_ALEN); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 49e16b6e0ba3..112679d64be5 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -158,17 +158,6 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) return 0; } -static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu) -{ - /* check ranges */ - if ((new_mtu < 68) || (new_mtu > batadv_hardif_min_mtu(dev))) - return -EINVAL; - - dev->mtu = new_mtu; - - return 0; -} - /** * batadv_interface_set_rx_mode - set the rx mode of a device * @dev: registered network device to modify @@ -920,7 +909,6 @@ static const struct net_device_ops batadv_netdev_ops = { .ndo_vlan_rx_add_vid = batadv_interface_add_vid, .ndo_vlan_rx_kill_vid = batadv_interface_kill_vid, .ndo_set_mac_address = batadv_interface_set_mac_addr, - .ndo_change_mtu = batadv_interface_change_mtu, .ndo_set_rx_mode = batadv_interface_set_rx_mode, .ndo_start_xmit = batadv_interface_tx, .ndo_validate_addr = eth_validate_addr, @@ -987,6 +975,7 @@ struct net_device *batadv_softif_create(struct net *net, const char *name) dev_net_set(soft_iface, net); soft_iface->rtnl_link_ops = &batadv_link_ops; + soft_iface->max_mtu = batadv_hardif_min_mtu(soft_iface); ret = register_netdevice(soft_iface); if (ret < 0) { diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 16737cd8dae8..fc65b145f6e7 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -398,6 +398,7 @@ void hsr_dev_setup(struct net_device *dev) random_ether_addr(dev->dev_addr); ether_setup(dev); + dev->min_mtu = 0; dev->header_ops = &hsr_header_ops; dev->netdev_ops = &hsr_device_ops; SET_NETDEV_DEVTYPE(dev, &hsr_type); diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c index fa8237fdc57b..21c28b51be94 100644 --- a/net/phonet/pep-gprs.c +++ b/net/phonet/pep-gprs.c @@ -217,20 +217,10 @@ static netdev_tx_t gprs_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static int gprs_set_mtu(struct net_device *dev, int new_mtu) -{ - if ((new_mtu < 576) || (new_mtu > (PHONET_MAX_MTU - 11))) - return -EINVAL; - - dev->mtu = new_mtu; - return 0; -} - static const struct net_device_ops gprs_netdev_ops = { .ndo_open = gprs_open, .ndo_stop = gprs_close, .ndo_start_xmit = gprs_xmit, - .ndo_change_mtu = gprs_set_mtu, }; static void gprs_setup(struct net_device *dev) @@ -239,6 +229,8 @@ static void gprs_setup(struct net_device *dev) dev->type = ARPHRD_PHONET_PIPE; dev->flags = IFF_POINTOPOINT | IFF_NOARP; dev->mtu = GPRS_DEFAULT_MTU; + dev->min_mtu = 576; + dev->max_mtu = (PHONET_MAX_MTU - 11); dev->hard_header_len = 0; dev->addr_len = 0; dev->tx_queue_len = 10; -- cgit v1.2.3 From e5f1b245870d59be0e6cc3b33edf5406a3b59648 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 5 Oct 2016 09:33:12 +0200 Subject: cpuidle: governors: Remove remaining old module code The governor's code use try_module_get() and put_module() to refcount the governor's module. But the governors are not compiled as module. The refcount does not prevent to switch the governor or unload a module as they aren't compiled as modules. The code is pointless, so remove it. Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governor.c | 4 ---- drivers/cpuidle/governors/ladder.c | 2 -- drivers/cpuidle/governors/menu.c | 2 -- include/linux/cpuidle.h | 2 -- 4 files changed, 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c index fb9f511cca23..4e78263e34a4 100644 --- a/drivers/cpuidle/governor.c +++ b/drivers/cpuidle/governor.c @@ -9,7 +9,6 @@ */ #include -#include #include #include "cpuidle.h" @@ -53,14 +52,11 @@ int cpuidle_switch_governor(struct cpuidle_governor *gov) if (cpuidle_curr_governor) { list_for_each_entry(dev, &cpuidle_detected_devices, device_list) cpuidle_disable_device(dev); - module_put(cpuidle_curr_governor->owner); } cpuidle_curr_governor = gov; if (gov) { - if (!try_module_get(cpuidle_curr_governor->owner)) - return -EINVAL; list_for_each_entry(dev, &cpuidle_detected_devices, device_list) cpuidle_enable_device(dev); cpuidle_install_idle_handler(); diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index 63bd5a403e22..fe8f08948fcb 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -177,7 +176,6 @@ static struct cpuidle_governor ladder_governor = { .enable = ladder_enable_device, .select = ladder_select_state, .reflect = ladder_reflect, - .owner = THIS_MODULE, }; /** diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 03d38c291de6..d9b5b9398a0f 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -19,7 +19,6 @@ #include #include #include -#include /* * Please note when changing the tuning values: @@ -484,7 +483,6 @@ static struct cpuidle_governor menu_governor = { .enable = menu_enable_device, .select = menu_select, .reflect = menu_reflect, - .owner = THIS_MODULE, }; /** diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index bb31373c3478..15deea449edc 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -235,8 +235,6 @@ struct cpuidle_governor { int (*select) (struct cpuidle_driver *drv, struct cpuidle_device *dev); void (*reflect) (struct cpuidle_device *dev, int index); - - struct module *owner; }; #ifdef CONFIG_CPU_IDLE -- cgit v1.2.3 From 62006c1702b3b1be0c0726949e0ee0ea2326be9c Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 17 Oct 2016 20:16:58 +0200 Subject: PM / Runtime: Remove the exported function pm_children_suspended() The exported function pm_children_suspended() has only one caller, which is the runtime PM internal function, rpm_check_suspend_allowed(). Let's clean-up this code, by removing pm_children_suspended() altogether and instead do the one-liner check directly in rpm_check_suspend_allowed(). Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 3 ++- include/linux/pm_runtime.h | 7 ------- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 82a081ea4317..53b427dfc403 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -241,7 +241,8 @@ static int rpm_check_suspend_allowed(struct device *dev) retval = -EACCES; else if (atomic_read(&dev->power.usage_count) > 0) retval = -EAGAIN; - else if (!pm_children_suspended(dev)) + else if (!dev->power.ignore_children && + atomic_read(&dev->power.child_count)) retval = -EBUSY; /* Pending resume requests take precedence over suspends. */ diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 2e14d2667b6c..61ea5666c94c 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -61,12 +61,6 @@ static inline void pm_suspend_ignore_children(struct device *dev, bool enable) dev->power.ignore_children = enable; } -static inline bool pm_children_suspended(struct device *dev) -{ - return dev->power.ignore_children - || !atomic_read(&dev->power.child_count); -} - static inline void pm_runtime_get_noresume(struct device *dev) { atomic_inc(&dev->power.usage_count); @@ -162,7 +156,6 @@ static inline void pm_runtime_allow(struct device *dev) {} static inline void pm_runtime_forbid(struct device *dev) {} static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {} -static inline bool pm_children_suspended(struct device *dev) { return false; } static inline void pm_runtime_get_noresume(struct device *dev) {} static inline void pm_runtime_put_noidle(struct device *dev) {} static inline bool device_run_wake(struct device *dev) { return false; } -- cgit v1.2.3 From b1a60995a684f2b6052cda640b0704361ab40089 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 17 Oct 2016 20:17:00 +0200 Subject: PM / Runtime: Convert pm_runtime_set_suspended() to return an int Because pm_runtime_set_suspended() invokes __pm_runtime_set_status(), which can fail, pm_runtime_set_suspended() can also fail. Instead of hiding a potential error, let's propagate it by converting pm_runtime_set_suspended() from a void to return an int. In this way users are able to check the error code and act accordingly. Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij Signed-off-by: Rafael J. Wysocki --- include/linux/pm_runtime.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 61ea5666c94c..4957fc185ea9 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -258,9 +258,9 @@ static inline int pm_runtime_set_active(struct device *dev) return __pm_runtime_set_status(dev, RPM_ACTIVE); } -static inline void pm_runtime_set_suspended(struct device *dev) +static inline int pm_runtime_set_suspended(struct device *dev) { - __pm_runtime_set_status(dev, RPM_SUSPENDED); + return __pm_runtime_set_status(dev, RPM_SUSPENDED); } static inline void pm_runtime_disable(struct device *dev) -- cgit v1.2.3 From 59d65b73a23cee48e6f3e44686f199d79b7ee854 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Fri, 14 Oct 2016 10:47:49 -0700 Subject: PM / Domains: Make genpd state allocation dynamic Allow PM Domain states to be defined dynamically by the drivers. This removes the limitation on the maximum number of states possible for a domain. Suggested-by: Ulf Hansson Signed-off-by: Lina Iyer Acked-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- arch/arm/mach-imx/gpc.c | 17 ++++++++++------- drivers/base/power/domain.c | 35 +++++++++++++++++++++++------------ include/linux/pm_domain.h | 5 ++--- 3 files changed, 35 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c index 0df062d8b2c9..57a410bbb6a2 100644 --- a/arch/arm/mach-imx/gpc.c +++ b/arch/arm/mach-imx/gpc.c @@ -380,13 +380,6 @@ static struct pu_domain imx6q_pu_domain = { .name = "PU", .power_off = imx6q_pm_pu_power_off, .power_on = imx6q_pm_pu_power_on, - .states = { - [0] = { - .power_off_latency_ns = 25000, - .power_on_latency_ns = 2000000, - }, - }, - .state_count = 1, }, }; @@ -430,6 +423,16 @@ static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg) if (!IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS)) return 0; + imx6q_pu_domain.base.states = devm_kzalloc(dev, + sizeof(*imx6q_pu_domain.base.states), + GFP_KERNEL); + if (!imx6q_pu_domain.base.states) + return -ENOMEM; + + imx6q_pu_domain.base.states[0].power_off_latency_ns = 25000; + imx6q_pu_domain.base.states[0].power_on_latency_ns = 2000000; + imx6q_pu_domain.base.state_count = 1; + pm_genpd_init(&imx6q_pu_domain.base, NULL, false); return of_genpd_add_provider_onecell(dev->of_node, &imx_gpc_onecell_data); diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index e023066e4215..37ab7f1ef178 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1282,6 +1282,21 @@ out: } EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain); +static int genpd_set_default_power_state(struct generic_pm_domain *genpd) +{ + struct genpd_power_state *state; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return -ENOMEM; + + genpd->states = state; + genpd->state_count = 1; + genpd->free = state; + + return 0; +} + /** * pm_genpd_init - Initialize a generic I/O PM domain object. * @genpd: PM domain object to initialize. @@ -1293,6 +1308,8 @@ EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain); int pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off) { + int ret; + if (IS_ERR_OR_NULL(genpd)) return -EINVAL; @@ -1325,19 +1342,12 @@ int pm_genpd_init(struct generic_pm_domain *genpd, genpd->dev_ops.start = pm_clk_resume; } - if (genpd->state_idx >= GENPD_MAX_NUM_STATES) { - pr_warn("Initial state index out of bounds.\n"); - genpd->state_idx = GENPD_MAX_NUM_STATES - 1; - } - - if (genpd->state_count > GENPD_MAX_NUM_STATES) { - pr_warn("Limiting states to %d\n", GENPD_MAX_NUM_STATES); - genpd->state_count = GENPD_MAX_NUM_STATES; - } - /* Use only one "off" state if there were no states declared */ - if (genpd->state_count == 0) - genpd->state_count = 1; + if (genpd->state_count == 0) { + ret = genpd_set_default_power_state(genpd); + if (ret) + return ret; + } mutex_lock(&gpd_list_lock); list_add(&genpd->gpd_list_node, &gpd_list); @@ -1377,6 +1387,7 @@ static int genpd_remove(struct generic_pm_domain *genpd) list_del(&genpd->gpd_list_node); mutex_unlock(&genpd->lock); cancel_work_sync(&genpd->power_off_work); + kfree(genpd->free); pr_debug("%s: removed %s\n", __func__, genpd->name); return 0; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index a09fe5c009c8..de1d8f331b03 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -19,8 +19,6 @@ /* Defines used for the flags field in the struct generic_pm_domain */ #define GENPD_FLAG_PM_CLK (1U << 0) /* PM domain uses PM clk */ -#define GENPD_MAX_NUM_STATES 8 /* Number of possible low power states */ - enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ GPD_STATE_POWER_OFF, /* PM domain is off */ @@ -70,9 +68,10 @@ struct generic_pm_domain { void (*detach_dev)(struct generic_pm_domain *domain, struct device *dev); unsigned int flags; /* Bit field of configs for genpd */ - struct genpd_power_state states[GENPD_MAX_NUM_STATES]; + struct genpd_power_state *states; unsigned int state_count; /* number of states */ unsigned int state_idx; /* state that genpd will go to when off */ + void *free; /* Free the state that was allocated for default */ }; -- cgit v1.2.3 From 405f7226014093a2809f27ba32a8230e770ac876 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Fri, 14 Oct 2016 10:47:50 -0700 Subject: PM / Domains: Add residency property to genpd states Residency of a domain's idle state indicates that the minimum idle time for the domain's idle state to be beneficial for power. Add the parameter to the state node. Future patches, will use the residency value in the genpd governor to determine if it is worth while to enter an idle state. Signed-off-by: Lina Iyer Acked-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index de1d8f331b03..f4492eb71701 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -38,6 +38,7 @@ struct gpd_dev_ops { struct genpd_power_state { s64 power_off_latency_ns; s64 power_on_latency_ns; + s64 residency_ns; }; struct generic_pm_domain { -- cgit v1.2.3 From 30f604283e05d34cb10108c7ba017e5f4fc9d62c Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Fri, 14 Oct 2016 10:47:51 -0700 Subject: PM / Domains: Allow domain power states to be read from DT This patch allows domains to define idle states in the DT. SoC's can define domain idle states in DT using the "domain-idle-states" property of the domain provider. Add API to read the idle states from DT that can be set in the genpd object. This patch is based on the original patch by Marc Titinger. Signed-off-by: Marc Titinger Signed-off-by: Ulf Hansson Signed-off-by: Lina Iyer Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 94 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/pm_domain.h | 8 ++++ 2 files changed, 102 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 37ab7f1ef178..9af75ba0472a 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1916,6 +1916,100 @@ out: return ret ? -EPROBE_DEFER : 0; } EXPORT_SYMBOL_GPL(genpd_dev_pm_attach); + +static const struct of_device_id idle_state_match[] = { + { .compatible = "arm,idle-state", }, + { } +}; + +static int genpd_parse_state(struct genpd_power_state *genpd_state, + struct device_node *state_node) +{ + int err; + u32 residency; + u32 entry_latency, exit_latency; + const struct of_device_id *match_id; + + match_id = of_match_node(idle_state_match, state_node); + if (!match_id) + return -EINVAL; + + err = of_property_read_u32(state_node, "entry-latency-us", + &entry_latency); + if (err) { + pr_debug(" * %s missing entry-latency-us property\n", + state_node->full_name); + return -EINVAL; + } + + err = of_property_read_u32(state_node, "exit-latency-us", + &exit_latency); + if (err) { + pr_debug(" * %s missing exit-latency-us property\n", + state_node->full_name); + return -EINVAL; + } + + err = of_property_read_u32(state_node, "min-residency-us", &residency); + if (!err) + genpd_state->residency_ns = 1000 * residency; + + genpd_state->power_on_latency_ns = 1000 * exit_latency; + genpd_state->power_off_latency_ns = 1000 * entry_latency; + + return 0; +} + +/** + * of_genpd_parse_idle_states: Return array of idle states for the genpd. + * + * @dn: The genpd device node + * @states: The pointer to which the state array will be saved. + * @n: The count of elements in the array returned from this function. + * + * Returns the device states parsed from the OF node. The memory for the states + * is allocated by this function and is the responsibility of the caller to + * free the memory after use. + */ +int of_genpd_parse_idle_states(struct device_node *dn, + struct genpd_power_state **states, int *n) +{ + struct genpd_power_state *st; + struct device_node *np; + int i = 0; + int err, ret; + int count; + struct of_phandle_iterator it; + + count = of_count_phandle_with_args(dn, "domain-idle-states", NULL); + if (!count) + return -EINVAL; + + st = kcalloc(count, sizeof(*st), GFP_KERNEL); + if (!st) + return -ENOMEM; + + /* Loop over the phandles until all the requested entry is found */ + of_for_each_phandle(&it, err, dn, "domain-idle-states", NULL, 0) { + np = it.node; + ret = genpd_parse_state(&st[i++], np); + if (ret) { + pr_err + ("Parsing idle state node %s failed with err %d\n", + np->full_name, ret); + of_node_put(np); + kfree(st); + return ret; + } + } + + *n = count; + *states = st; + + return 0; +} +EXPORT_SYMBOL_GPL(of_genpd_parse_idle_states); + #endif /* CONFIG_PM_GENERIC_DOMAINS_OF */ diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index f4492eb71701..b4894969fbec 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -205,6 +205,8 @@ extern int of_genpd_add_device(struct of_phandle_args *args, extern int of_genpd_add_subdomain(struct of_phandle_args *parent, struct of_phandle_args *new_subdomain); extern struct generic_pm_domain *of_genpd_remove_last(struct device_node *np); +extern int of_genpd_parse_idle_states(struct device_node *dn, + struct genpd_power_state **states, int *n); int genpd_dev_pm_attach(struct device *dev); #else /* !CONFIG_PM_GENERIC_DOMAINS_OF */ @@ -234,6 +236,12 @@ static inline int of_genpd_add_subdomain(struct of_phandle_args *parent, return -ENODEV; } +static inline int of_genpd_parse_idle_states(struct device_node *dn, + struct genpd_power_state **states, int *n) +{ + return -ENODEV; +} + static inline int genpd_dev_pm_attach(struct device *dev) { return -ENODEV; -- cgit v1.2.3 From 0c9b694a8a7d4853318c4f2ce315afa2bd3664b6 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Fri, 14 Oct 2016 10:47:52 -0700 Subject: PM / Domains: Save the fwnode in genpd_power_state Save the fwnode for the genpd state in the state node. PM Domain clients may use the fwnode to read in the platform specific domain state properties and associate them with the state. Signed-off-by: Lina Iyer Acked-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 1 + include/linux/pm_domain.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 9af75ba0472a..1a6073aaca0e 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1956,6 +1956,7 @@ static int genpd_parse_state(struct genpd_power_state *genpd_state, genpd_state->power_on_latency_ns = 1000 * exit_latency; genpd_state->power_off_latency_ns = 1000 * entry_latency; + genpd_state->fwnode = &state_node->fwnode; return 0; } diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index b4894969fbec..6a8988166899 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -39,6 +39,7 @@ struct genpd_power_state { s64 power_off_latency_ns; s64 power_on_latency_ns; s64 residency_ns; + struct fwnode_handle *fwnode; }; struct generic_pm_domain { -- cgit v1.2.3 From 35241d12f750d2f1556a9c85f175ce7044716881 Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Fri, 14 Oct 2016 10:47:54 -0700 Subject: PM / Domains: Abstract genpd locking Abstract genpd lock/unlock calls, in preparation for domain specific locks added in the following patches. Signed-off-by: Lina Iyer Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 121 +++++++++++++++++++++++++++++--------------- include/linux/pm_domain.h | 5 +- 2 files changed, 85 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 1a6073aaca0e..4194012cdf86 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -39,6 +39,46 @@ static LIST_HEAD(gpd_list); static DEFINE_MUTEX(gpd_list_lock); +struct genpd_lock_ops { + void (*lock)(struct generic_pm_domain *genpd); + void (*lock_nested)(struct generic_pm_domain *genpd, int depth); + int (*lock_interruptible)(struct generic_pm_domain *genpd); + void (*unlock)(struct generic_pm_domain *genpd); +}; + +static void genpd_lock_mtx(struct generic_pm_domain *genpd) +{ + mutex_lock(&genpd->mlock); +} + +static void genpd_lock_nested_mtx(struct generic_pm_domain *genpd, + int depth) +{ + mutex_lock_nested(&genpd->mlock, depth); +} + +static int genpd_lock_interruptible_mtx(struct generic_pm_domain *genpd) +{ + return mutex_lock_interruptible(&genpd->mlock); +} + +static void genpd_unlock_mtx(struct generic_pm_domain *genpd) +{ + return mutex_unlock(&genpd->mlock); +} + +static const struct genpd_lock_ops genpd_mtx_ops = { + .lock = genpd_lock_mtx, + .lock_nested = genpd_lock_nested_mtx, + .lock_interruptible = genpd_lock_interruptible_mtx, + .unlock = genpd_unlock_mtx, +}; + +#define genpd_lock(p) p->lock_ops->lock(p) +#define genpd_lock_nested(p, d) p->lock_ops->lock_nested(p, d) +#define genpd_lock_interruptible(p) p->lock_ops->lock_interruptible(p) +#define genpd_unlock(p) p->lock_ops->unlock(p) + /* * Get the generic PM domain for a particular struct device. * This validates the struct device pointer, the PM domain pointer, @@ -200,9 +240,9 @@ static int genpd_poweron(struct generic_pm_domain *genpd, unsigned int depth) genpd_sd_counter_inc(master); - mutex_lock_nested(&master->lock, depth + 1); + genpd_lock_nested(master, depth + 1); ret = genpd_poweron(master, depth + 1); - mutex_unlock(&master->lock); + genpd_unlock(master); if (ret) { genpd_sd_counter_dec(master); @@ -255,9 +295,9 @@ static int genpd_dev_pm_qos_notifier(struct notifier_block *nb, spin_unlock_irq(&dev->power.lock); if (!IS_ERR(genpd)) { - mutex_lock(&genpd->lock); + genpd_lock(genpd); genpd->max_off_time_changed = true; - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); } dev = dev->parent; @@ -354,9 +394,9 @@ static void genpd_power_off_work_fn(struct work_struct *work) genpd = container_of(work, struct generic_pm_domain, power_off_work); - mutex_lock(&genpd->lock); + genpd_lock(genpd); genpd_poweroff(genpd, true); - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); } /** @@ -472,9 +512,9 @@ static int genpd_runtime_suspend(struct device *dev) if (dev->power.irq_safe) return 0; - mutex_lock(&genpd->lock); + genpd_lock(genpd); genpd_poweroff(genpd, false); - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); return 0; } @@ -509,9 +549,9 @@ static int genpd_runtime_resume(struct device *dev) goto out; } - mutex_lock(&genpd->lock); + genpd_lock(genpd); ret = genpd_poweron(genpd, 0); - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); if (ret) return ret; @@ -547,9 +587,9 @@ err_stop: genpd_stop_dev(genpd, dev); err_poweroff: if (!dev->power.irq_safe) { - mutex_lock(&genpd->lock); + genpd_lock(genpd); genpd_poweroff(genpd, 0); - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); } return ret; @@ -732,20 +772,20 @@ static int pm_genpd_prepare(struct device *dev) if (resume_needed(dev, genpd)) pm_runtime_resume(dev); - mutex_lock(&genpd->lock); + genpd_lock(genpd); if (genpd->prepared_count++ == 0) genpd->suspended_count = 0; - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); ret = pm_generic_prepare(dev); if (ret) { - mutex_lock(&genpd->lock); + genpd_lock(genpd); genpd->prepared_count--; - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); } return ret; @@ -936,13 +976,13 @@ static void pm_genpd_complete(struct device *dev) pm_generic_complete(dev); - mutex_lock(&genpd->lock); + genpd_lock(genpd); genpd->prepared_count--; if (!genpd->prepared_count) genpd_queue_power_off_work(genpd); - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); } /** @@ -1071,7 +1111,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, if (IS_ERR(gpd_data)) return PTR_ERR(gpd_data); - mutex_lock(&genpd->lock); + genpd_lock(genpd); if (genpd->prepared_count > 0) { ret = -EAGAIN; @@ -1088,7 +1128,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, list_add_tail(&gpd_data->base.list_node, &genpd->dev_list); out: - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); if (ret) genpd_free_dev_data(dev, gpd_data); @@ -1130,7 +1170,7 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, gpd_data = to_gpd_data(pdd); dev_pm_qos_remove_notifier(dev, &gpd_data->nb); - mutex_lock(&genpd->lock); + genpd_lock(genpd); if (genpd->prepared_count > 0) { ret = -EAGAIN; @@ -1145,14 +1185,14 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, list_del_init(&pdd->list_node); - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); genpd_free_dev_data(dev, gpd_data); return 0; out: - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); dev_pm_qos_add_notifier(dev, &gpd_data->nb); return ret; @@ -1187,8 +1227,8 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd, if (!link) return -ENOMEM; - mutex_lock(&subdomain->lock); - mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING); + genpd_lock(subdomain); + genpd_lock_nested(genpd, SINGLE_DEPTH_NESTING); if (genpd->status == GPD_STATE_POWER_OFF && subdomain->status != GPD_STATE_POWER_OFF) { @@ -1211,8 +1251,8 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd, genpd_sd_counter_inc(genpd); out: - mutex_unlock(&genpd->lock); - mutex_unlock(&subdomain->lock); + genpd_unlock(genpd); + genpd_unlock(subdomain); if (ret) kfree(link); return ret; @@ -1250,8 +1290,8 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain)) return -EINVAL; - mutex_lock(&subdomain->lock); - mutex_lock_nested(&genpd->lock, SINGLE_DEPTH_NESTING); + genpd_lock(subdomain); + genpd_lock_nested(genpd, SINGLE_DEPTH_NESTING); if (!list_empty(&subdomain->master_links) || subdomain->device_count) { pr_warn("%s: unable to remove subdomain %s\n", genpd->name, @@ -1275,8 +1315,8 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, } out: - mutex_unlock(&genpd->lock); - mutex_unlock(&subdomain->lock); + genpd_unlock(genpd); + genpd_unlock(subdomain); return ret; } @@ -1316,7 +1356,8 @@ int pm_genpd_init(struct generic_pm_domain *genpd, INIT_LIST_HEAD(&genpd->master_links); INIT_LIST_HEAD(&genpd->slave_links); INIT_LIST_HEAD(&genpd->dev_list); - mutex_init(&genpd->lock); + mutex_init(&genpd->mlock); + genpd->lock_ops = &genpd_mtx_ops; genpd->gov = gov; INIT_WORK(&genpd->power_off_work, genpd_power_off_work_fn); atomic_set(&genpd->sd_count, 0); @@ -1364,16 +1405,16 @@ static int genpd_remove(struct generic_pm_domain *genpd) if (IS_ERR_OR_NULL(genpd)) return -EINVAL; - mutex_lock(&genpd->lock); + genpd_lock(genpd); if (genpd->has_provider) { - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); pr_err("Provider present, unable to remove %s\n", genpd->name); return -EBUSY; } if (!list_empty(&genpd->master_links) || genpd->device_count) { - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); pr_err("%s: unable to remove %s\n", __func__, genpd->name); return -EBUSY; } @@ -1385,7 +1426,7 @@ static int genpd_remove(struct generic_pm_domain *genpd) } list_del(&genpd->gpd_list_node); - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); cancel_work_sync(&genpd->power_off_work); kfree(genpd->free); pr_debug("%s: removed %s\n", __func__, genpd->name); @@ -1909,9 +1950,9 @@ int genpd_dev_pm_attach(struct device *dev) dev->pm_domain->detach = genpd_dev_pm_detach; dev->pm_domain->sync = genpd_dev_pm_sync; - mutex_lock(&pd->lock); + genpd_lock(pd); ret = genpd_poweron(pd, 0); - mutex_unlock(&pd->lock); + genpd_unlock(pd); out: return ret ? -EPROBE_DEFER : 0; } @@ -2064,7 +2105,7 @@ static int pm_genpd_summary_one(struct seq_file *s, char state[16]; int ret; - ret = mutex_lock_interruptible(&genpd->lock); + ret = genpd_lock_interruptible(genpd); if (ret) return -ERESTARTSYS; @@ -2101,7 +2142,7 @@ static int pm_genpd_summary_one(struct seq_file *s, seq_puts(s, "\n"); exit: - mutex_unlock(&genpd->lock); + genpd_unlock(genpd); return 0; } diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 6a8988166899..811b968eb740 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -42,13 +42,14 @@ struct genpd_power_state { struct fwnode_handle *fwnode; }; +struct genpd_lock_ops; + struct generic_pm_domain { struct dev_pm_domain domain; /* PM domain operations */ struct list_head gpd_list_node; /* Node in the global PM domains list */ struct list_head master_links; /* Links with PM domain as a master */ struct list_head slave_links; /* Links with PM domain as a slave */ struct list_head dev_list; /* List of devices */ - struct mutex lock; struct dev_power_governor *gov; struct work_struct power_off_work; struct fwnode_handle *provider; /* Identity of the domain provider */ @@ -74,6 +75,8 @@ struct generic_pm_domain { unsigned int state_count; /* number of states */ unsigned int state_idx; /* state that genpd will go to when off */ void *free; /* Free the state that was allocated for default */ + const struct genpd_lock_ops *lock_ops; + struct mutex mlock; }; -- cgit v1.2.3 From d716f4798ff8c65ace4a6ab291f9a4ff265df4ba Mon Sep 17 00:00:00 2001 From: Lina Iyer Date: Fri, 14 Oct 2016 10:47:55 -0700 Subject: PM / Domains: Support IRQ safe PM domains Generic Power Domains currently support turning on/off only in process context. This prevents the usage of PM domains for domains that could be powered on/off in a context where IRQs are disabled. Many such domains exist today and do not get powered off, when the IRQ safe devices in that domain are powered off, because of this limitation. However, not all domains can operate in IRQ safe contexts. Genpd therefore, has to support both cases where the domain may or may not operate in IRQ safe contexts. Configuring genpd to use an appropriate lock for that domain, would allow domains that have IRQ safe devices to runtime suspend and resume, in atomic context. To achieve domain specific locking, set the domain's ->flag to GENPD_FLAG_IRQ_SAFE while defining the domain. This indicates that genpd should use a spinlock instead of a mutex for locking the domain. Locking is abstracted through genpd_lock() and genpd_unlock() functions that use the flag to determine the appropriate lock to be used for that domain. Domains that have lower latency to suspend and resume and can operate with IRQs disabled may now be able to save power, when the component devices and sub-domains are idle at runtime. The restriction this imposes on the domain hierarchy is that non-IRQ safe domains may not have IRQ-safe subdomains, but IRQ safe domains may have IRQ safe and non-IRQ safe subdomains and devices. Signed-off-by: Lina Iyer Acked-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 111 ++++++++++++++++++++++++++++++++++++++++---- include/linux/pm_domain.h | 10 +++- 2 files changed, 110 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 4194012cdf86..aac656a889dc 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -74,11 +74,70 @@ static const struct genpd_lock_ops genpd_mtx_ops = { .unlock = genpd_unlock_mtx, }; +static void genpd_lock_spin(struct generic_pm_domain *genpd) + __acquires(&genpd->slock) +{ + unsigned long flags; + + spin_lock_irqsave(&genpd->slock, flags); + genpd->lock_flags = flags; +} + +static void genpd_lock_nested_spin(struct generic_pm_domain *genpd, + int depth) + __acquires(&genpd->slock) +{ + unsigned long flags; + + spin_lock_irqsave_nested(&genpd->slock, flags, depth); + genpd->lock_flags = flags; +} + +static int genpd_lock_interruptible_spin(struct generic_pm_domain *genpd) + __acquires(&genpd->slock) +{ + unsigned long flags; + + spin_lock_irqsave(&genpd->slock, flags); + genpd->lock_flags = flags; + return 0; +} + +static void genpd_unlock_spin(struct generic_pm_domain *genpd) + __releases(&genpd->slock) +{ + spin_unlock_irqrestore(&genpd->slock, genpd->lock_flags); +} + +static const struct genpd_lock_ops genpd_spin_ops = { + .lock = genpd_lock_spin, + .lock_nested = genpd_lock_nested_spin, + .lock_interruptible = genpd_lock_interruptible_spin, + .unlock = genpd_unlock_spin, +}; + #define genpd_lock(p) p->lock_ops->lock(p) #define genpd_lock_nested(p, d) p->lock_ops->lock_nested(p, d) #define genpd_lock_interruptible(p) p->lock_ops->lock_interruptible(p) #define genpd_unlock(p) p->lock_ops->unlock(p) +#define genpd_is_irq_safe(genpd) (genpd->flags & GENPD_FLAG_IRQ_SAFE) + +static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev, + struct generic_pm_domain *genpd) +{ + bool ret; + + ret = pm_runtime_is_irq_safe(dev) && !genpd_is_irq_safe(genpd); + + /* Warn once for each IRQ safe dev in no sleep domain */ + if (ret) + dev_warn_once(dev, "PM domain %s will not be powered off\n", + genpd->name); + + return ret; +} + /* * Get the generic PM domain for a particular struct device. * This validates the struct device pointer, the PM domain pointer, @@ -343,7 +402,12 @@ static int genpd_poweroff(struct generic_pm_domain *genpd, bool is_async) if (stat > PM_QOS_FLAGS_NONE) return -EBUSY; - if (!pm_runtime_suspended(pdd->dev) || pdd->dev->power.irq_safe) + /* + * Do not allow PM domain to be powered off, when an IRQ safe + * device is part of a non-IRQ safe domain. + */ + if (!pm_runtime_suspended(pdd->dev) || + irq_safe_dev_in_no_sleep_domain(pdd->dev, genpd)) not_suspended++; } @@ -506,10 +570,10 @@ static int genpd_runtime_suspend(struct device *dev) } /* - * If power.irq_safe is set, this routine will be run with interrupts - * off, so it can't use mutexes. + * If power.irq_safe is set, this routine may be run with + * IRQs disabled, so suspend only if the PM domain also is irq_safe. */ - if (dev->power.irq_safe) + if (irq_safe_dev_in_no_sleep_domain(dev, genpd)) return 0; genpd_lock(genpd); @@ -543,8 +607,11 @@ static int genpd_runtime_resume(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - /* If power.irq_safe, the PM domain is never powered off. */ - if (dev->power.irq_safe) { + /* + * As we don't power off a non IRQ safe domain, which holds + * an IRQ safe device, we don't need to restore power to it. + */ + if (irq_safe_dev_in_no_sleep_domain(dev, genpd)) { timed = false; goto out; } @@ -586,7 +653,8 @@ static int genpd_runtime_resume(struct device *dev) err_stop: genpd_stop_dev(genpd, dev); err_poweroff: - if (!dev->power.irq_safe) { + if (!pm_runtime_is_irq_safe(dev) || + (pm_runtime_is_irq_safe(dev) && genpd_is_irq_safe(genpd))) { genpd_lock(genpd); genpd_poweroff(genpd, 0); genpd_unlock(genpd); @@ -1223,6 +1291,17 @@ static int genpd_add_subdomain(struct generic_pm_domain *genpd, || genpd == subdomain) return -EINVAL; + /* + * If the domain can be powered on/off in an IRQ safe + * context, ensure that the subdomain can also be + * powered on/off in that context. + */ + if (!genpd_is_irq_safe(genpd) && genpd_is_irq_safe(subdomain)) { + WARN("Parent %s of subdomain %s must be IRQ safe\n", + genpd->name, subdomain->name); + return -EINVAL; + } + link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) return -ENOMEM; @@ -1337,6 +1416,17 @@ static int genpd_set_default_power_state(struct generic_pm_domain *genpd) return 0; } +static void genpd_lock_init(struct generic_pm_domain *genpd) +{ + if (genpd->flags & GENPD_FLAG_IRQ_SAFE) { + spin_lock_init(&genpd->slock); + genpd->lock_ops = &genpd_spin_ops; + } else { + mutex_init(&genpd->mlock); + genpd->lock_ops = &genpd_mtx_ops; + } +} + /** * pm_genpd_init - Initialize a generic I/O PM domain object. * @genpd: PM domain object to initialize. @@ -1356,8 +1446,7 @@ int pm_genpd_init(struct generic_pm_domain *genpd, INIT_LIST_HEAD(&genpd->master_links); INIT_LIST_HEAD(&genpd->slave_links); INIT_LIST_HEAD(&genpd->dev_list); - mutex_init(&genpd->mlock); - genpd->lock_ops = &genpd_mtx_ops; + genpd_lock_init(genpd); genpd->gov = gov; INIT_WORK(&genpd->power_off_work, genpd_power_off_work_fn); atomic_set(&genpd->sd_count, 0); @@ -2131,7 +2220,9 @@ static int pm_genpd_summary_one(struct seq_file *s, } list_for_each_entry(pm_data, &genpd->dev_list, list_node) { - kobj_path = kobject_get_path(&pm_data->dev->kobj, GFP_KERNEL); + kobj_path = kobject_get_path(&pm_data->dev->kobj, + genpd_is_irq_safe(genpd) ? + GFP_ATOMIC : GFP_KERNEL); if (kobj_path == NULL) continue; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 811b968eb740..81ece61075df 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -15,9 +15,11 @@ #include #include #include +#include /* Defines used for the flags field in the struct generic_pm_domain */ #define GENPD_FLAG_PM_CLK (1U << 0) /* PM domain uses PM clk */ +#define GENPD_FLAG_IRQ_SAFE (1U << 1) /* PM domain operates in atomic */ enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ @@ -76,7 +78,13 @@ struct generic_pm_domain { unsigned int state_idx; /* state that genpd will go to when off */ void *free; /* Free the state that was allocated for default */ const struct genpd_lock_ops *lock_ops; - struct mutex mlock; + union { + struct mutex mlock; + struct { + spinlock_t slock; + unsigned long lock_flags; + }; + }; }; -- cgit v1.2.3 From 2d0e30c30f84d08dc16f0f2af41f1b8a85f0755e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 21 Oct 2016 12:46:33 +0200 Subject: bpf: add helper for retrieving current numa node id Use case is mainly for soreuseport to select sockets for the local numa node, but since generic, lets also add this for other networking and tracing program types. Suggested-by: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 6 ++++++ kernel/bpf/core.c | 1 + kernel/bpf/helpers.c | 12 ++++++++++++ kernel/trace/bpf_trace.c | 2 ++ net/core/filter.c | 2 ++ 6 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c201017b5730..edcd96ded8aa 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -319,6 +319,7 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; +extern const struct bpf_func_proto bpf_get_numa_node_id_proto; extern const struct bpf_func_proto bpf_tail_call_proto; extern const struct bpf_func_proto bpf_ktime_get_ns_proto; extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f09c70b97eca..374ef582ae18 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -426,6 +426,12 @@ enum bpf_func_id { */ BPF_FUNC_set_hash_invalid, + /** + * bpf_get_numa_node_id() + * Returns the id of the current NUMA node. + */ + BPF_FUNC_get_numa_node_id, + __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index aa6d98154106..82a04143368e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1043,6 +1043,7 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak; const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; +const struct bpf_func_proto bpf_get_numa_node_id_proto __weak; const struct bpf_func_proto bpf_ktime_get_ns_proto __weak; const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 39918402e6e9..045cbe673356 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -92,6 +93,17 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = { .ret_type = RET_INTEGER, }; +BPF_CALL_0(bpf_get_numa_node_id) +{ + return numa_node_id(); +} + +const struct bpf_func_proto bpf_get_numa_node_id_proto = { + .func = bpf_get_numa_node_id, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + BPF_CALL_0(bpf_ktime_get_ns) { /* NMI safe access to clock monotonic */ diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 5dcb99281259..fa77311dadb2 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -422,6 +422,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) return bpf_get_trace_printk_proto(); case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; + case BPF_FUNC_get_numa_node_id: + return &bpf_get_numa_node_id_proto; case BPF_FUNC_perf_event_read: return &bpf_perf_event_read_proto; case BPF_FUNC_probe_write_user: diff --git a/net/core/filter.c b/net/core/filter.c index 00351cdf7d0c..cd9e2ba66b0e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2492,6 +2492,8 @@ sk_filter_func_proto(enum bpf_func_id func_id) return &bpf_get_prandom_u32_proto; case BPF_FUNC_get_smp_processor_id: return &bpf_get_raw_smp_processor_id_proto; + case BPF_FUNC_get_numa_node_id: + return &bpf_get_numa_node_id_proto; case BPF_FUNC_tail_call: return &bpf_tail_call_proto; case BPF_FUNC_ktime_get_ns: -- cgit v1.2.3 From 304887041d953b6692c0d4a9f8fafb252d32e9a0 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Thu, 20 Oct 2016 16:28:01 +0000 Subject: platform/x86: Introduce support for Mellanox hotplug driver Enable system support for the Mellanox Technologies hotplug platform driver, which provides support for the next Mellanox basic systems: "msx6710", "msx6720", "msb7700", "msn2700", "msx1410", "msn2410", "msb7800", "msn2740", "msn2100" and also various number of derivative systems from the above basic types. This driver handles hot-plug events for the power suppliers, power cables and fans for the above systems. The Kconfig currently controlling compilation of this code is: driver/platform/x86:config MLX_CPLD_PLATFORM tristate "Mellanox platform hotplug driver support" Signed-off-by: Vadim Pasternak Signed-off-by: Darren Hart --- MAINTAINERS | 7 + drivers/platform/x86/Kconfig | 11 + drivers/platform/x86/Makefile | 1 + drivers/platform/x86/mlxcpld-hotplug.c | 515 ++++++++++++++++++++++++++ include/linux/platform_data/mlxcpld-hotplug.h | 99 +++++ 5 files changed, 633 insertions(+) create mode 100644 drivers/platform/x86/mlxcpld-hotplug.c create mode 100644 include/linux/platform_data/mlxcpld-hotplug.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 1cd38a7e0064..3e30399e715f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7881,6 +7881,13 @@ L: platform-driver-x86@vger.kernel.org S: Supported F: arch/x86/platform/mellanox/mlx-platform.c +MELLANOX MLX CPLD HOTPLUG DRIVER +M: Vadim Pasternak +L: platform-driver-x86@vger.kernel.org +S: Supported +F: drivers/platform/x86/mlxcpld-hotplug.c +F: include/linux/platform_data/mlxcpld-hotplug.h + SOFT-ROCE DRIVER (rxe) M: Moni Shoua L: linux-rdma@vger.kernel.org diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index b8a21d7b25d4..185376901d9c 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -1027,4 +1027,15 @@ config INTEL_TELEMETRY used to get various SoC events and parameters directly via debugfs files. Various tools may use this interface for SoC state monitoring. + +config MLX_CPLD_PLATFORM + tristate "Mellanox platform hotplug driver support" + default n + depends on MLX_PLATFORM + select HWMON + select I2C + ---help--- + This driver handles hot-plug events for the power suppliers, power + cables and fans on the wide range Mellanox IB and Ethernet systems. + endif # X86_PLATFORM_DEVICES diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index 2efa86d2a1a7..1f06b6339cf7 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -71,3 +71,4 @@ obj-$(CONFIG_INTEL_TELEMETRY) += intel_telemetry_core.o \ intel_telemetry_pltdrv.o \ intel_telemetry_debugfs.o obj-$(CONFIG_INTEL_PMC_CORE) += intel_pmc_core.o +obj-$(CONFIG_MLX_CPLD_PLATFORM) += mlxcpld-hotplug.o diff --git a/drivers/platform/x86/mlxcpld-hotplug.c b/drivers/platform/x86/mlxcpld-hotplug.c new file mode 100644 index 000000000000..aff3686b3b37 --- /dev/null +++ b/drivers/platform/x86/mlxcpld-hotplug.c @@ -0,0 +1,515 @@ +/* + * drivers/platform/x86/mlxcpld-hotplug.c + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Vadim Pasternak + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Offset of event and mask registers from status register */ +#define MLXCPLD_HOTPLUG_EVENT_OFF 1 +#define MLXCPLD_HOTPLUG_MASK_OFF 2 +#define MLXCPLD_HOTPLUG_AGGR_MASK_OFF 1 + +#define MLXCPLD_HOTPLUG_ATTRS_NUM 8 + +/** + * enum mlxcpld_hotplug_attr_type - sysfs attributes for hotplug events: + * @MLXCPLD_HOTPLUG_ATTR_TYPE_PSU: power supply unit attribute; + * @MLXCPLD_HOTPLUG_ATTR_TYPE_PWR: power cable attribute; + * @MLXCPLD_HOTPLUG_ATTR_TYPE_FAN: FAN drawer attribute; + */ +enum mlxcpld_hotplug_attr_type { + MLXCPLD_HOTPLUG_ATTR_TYPE_PSU, + MLXCPLD_HOTPLUG_ATTR_TYPE_PWR, + MLXCPLD_HOTPLUG_ATTR_TYPE_FAN, +}; + +/** + * struct mlxcpld_hotplug_priv_data - platform private data: + * @irq: platform interrupt number; + * @pdev: platform device; + * @plat: platform data; + * @hwmon: hwmon device; + * @mlxcpld_hotplug_attr: sysfs attributes array; + * @mlxcpld_hotplug_dev_attr: sysfs sensor device attribute array; + * @group: sysfs attribute group; + * @groups: list of sysfs attribute group for hwmon registration; + * @dwork: delayed work template; + * @lock: spin lock; + * @aggr_cache: last value of aggregation register status; + * @psu_cache: last value of PSU register status; + * @pwr_cache: last value of power register status; + * @fan_cache: last value of FAN register status; + */ +struct mlxcpld_hotplug_priv_data { + int irq; + struct platform_device *pdev; + struct mlxcpld_hotplug_platform_data *plat; + struct device *hwmon; + struct attribute *mlxcpld_hotplug_attr[MLXCPLD_HOTPLUG_ATTRS_NUM + 1]; + struct sensor_device_attribute_2 + mlxcpld_hotplug_dev_attr[MLXCPLD_HOTPLUG_ATTRS_NUM]; + struct attribute_group group; + const struct attribute_group *groups[2]; + struct delayed_work dwork; + spinlock_t lock; + u8 aggr_cache; + u8 psu_cache; + u8 pwr_cache; + u8 fan_cache; +}; + +static ssize_t mlxcpld_hotplug_attr_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct platform_device *pdev = to_platform_device(dev); + struct mlxcpld_hotplug_priv_data *priv = platform_get_drvdata(pdev); + int index = to_sensor_dev_attr_2(attr)->index; + int nr = to_sensor_dev_attr_2(attr)->nr; + u8 reg_val = 0; + + switch (nr) { + case MLXCPLD_HOTPLUG_ATTR_TYPE_PSU: + /* Bit = 0 : PSU is present. */ + reg_val = !!!(inb(priv->plat->psu_reg_offset) & BIT(index)); + break; + + case MLXCPLD_HOTPLUG_ATTR_TYPE_PWR: + /* Bit = 1 : power cable is attached. */ + reg_val = !!(inb(priv->plat->pwr_reg_offset) & BIT(index % + priv->plat->pwr_count)); + break; + + case MLXCPLD_HOTPLUG_ATTR_TYPE_FAN: + /* Bit = 0 : FAN is present. */ + reg_val = !!!(inb(priv->plat->fan_reg_offset) & BIT(index % + priv->plat->fan_count)); + break; + } + + return sprintf(buf, "%u\n", reg_val); +} + +#define PRIV_ATTR(i) priv->mlxcpld_hotplug_attr[i] +#define PRIV_DEV_ATTR(i) priv->mlxcpld_hotplug_dev_attr[i] +static int mlxcpld_hotplug_attr_init(struct mlxcpld_hotplug_priv_data *priv) +{ + int num_attrs = priv->plat->psu_count + priv->plat->pwr_count + + priv->plat->fan_count; + int i; + + priv->group.attrs = devm_kzalloc(&priv->pdev->dev, num_attrs * + sizeof(struct attribute *), + GFP_KERNEL); + if (!priv->group.attrs) + return -ENOMEM; + + for (i = 0; i < num_attrs; i++) { + PRIV_ATTR(i) = &PRIV_DEV_ATTR(i).dev_attr.attr; + + if (i < priv->plat->psu_count) { + PRIV_ATTR(i)->name = devm_kasprintf(&priv->pdev->dev, + GFP_KERNEL, "psu%u", i + 1); + PRIV_DEV_ATTR(i).nr = MLXCPLD_HOTPLUG_ATTR_TYPE_PSU; + } else if (i < priv->plat->psu_count + priv->plat->pwr_count) { + PRIV_ATTR(i)->name = devm_kasprintf(&priv->pdev->dev, + GFP_KERNEL, "pwr%u", i % + priv->plat->pwr_count + 1); + PRIV_DEV_ATTR(i).nr = MLXCPLD_HOTPLUG_ATTR_TYPE_PWR; + } else { + PRIV_ATTR(i)->name = devm_kasprintf(&priv->pdev->dev, + GFP_KERNEL, "fan%u", i % + priv->plat->fan_count + 1); + PRIV_DEV_ATTR(i).nr = MLXCPLD_HOTPLUG_ATTR_TYPE_FAN; + } + + if (!PRIV_ATTR(i)->name) { + dev_err(&priv->pdev->dev, "Memory allocation failed for sysfs attribute %d.\n", + i + 1); + return -ENOMEM; + } + + PRIV_DEV_ATTR(i).dev_attr.attr.name = PRIV_ATTR(i)->name; + PRIV_DEV_ATTR(i).dev_attr.attr.mode = S_IRUGO; + PRIV_DEV_ATTR(i).dev_attr.show = mlxcpld_hotplug_attr_show; + PRIV_DEV_ATTR(i).index = i; + sysfs_attr_init(&PRIV_DEV_ATTR(i).dev_attr.attr); + } + + priv->group.attrs = priv->mlxcpld_hotplug_attr; + priv->groups[0] = &priv->group; + priv->groups[1] = NULL; + + return 0; +} + +static int mlxcpld_hotplug_device_create(struct device *dev, + struct mlxcpld_hotplug_device *item) +{ + item->adapter = i2c_get_adapter(item->bus); + if (!item->adapter) { + dev_err(dev, "Failed to get adapter for bus %d\n", + item->bus); + return -EFAULT; + } + + item->client = i2c_new_device(item->adapter, &item->brdinfo); + if (!item->client) { + dev_err(dev, "Failed to create client %s at bus %d at addr 0x%02x\n", + item->brdinfo.type, item->bus, item->brdinfo.addr); + i2c_put_adapter(item->adapter); + item->adapter = NULL; + return -EFAULT; + } + + return 0; +} + +static void mlxcpld_hotplug_device_destroy(struct mlxcpld_hotplug_device *item) +{ + if (item->client) { + i2c_unregister_device(item->client); + item->client = NULL; + } + + if (item->adapter) { + i2c_put_adapter(item->adapter); + item->adapter = NULL; + } +} + +static inline void +mlxcpld_hotplug_work_helper(struct device *dev, + struct mlxcpld_hotplug_device *item, u8 is_inverse, + u16 offset, u8 mask, u8 *cache) +{ + u8 val, asserted; + int bit; + + /* Mask event. */ + outb(0, offset + MLXCPLD_HOTPLUG_MASK_OFF); + /* Read status. */ + val = inb(offset) & mask; + asserted = *cache ^ val; + *cache = val; + + /* + * Validate if item related to received signal type is valid. + * It should never happen, excepted the situation when some + * piece of hardware is broken. In such situation just produce + * error message and return. Caller must continue to handle the + * signals from other devices if any. + */ + if (unlikely(!item)) { + dev_err(dev, "False signal is received: register at offset 0x%02x, mask 0x%02x.\n", + offset, mask); + return; + } + + for_each_set_bit(bit, (unsigned long *)&asserted, 8) { + if (val & BIT(bit)) { + if (is_inverse) + mlxcpld_hotplug_device_destroy(item + bit); + else + mlxcpld_hotplug_device_create(dev, item + bit); + } else { + if (is_inverse) + mlxcpld_hotplug_device_create(dev, item + bit); + else + mlxcpld_hotplug_device_destroy(item + bit); + } + } + + /* Acknowledge event. */ + outb(0, offset + MLXCPLD_HOTPLUG_EVENT_OFF); + /* Unmask event. */ + outb(mask, offset + MLXCPLD_HOTPLUG_MASK_OFF); +} + +/* + * mlxcpld_hotplug_work_handler - performs traversing of CPLD interrupt + * registers according to the below hierarchy schema: + * + * Aggregation registers (status/mask) + * PSU registers: *---* + * *-----------------* | | + * |status/event/mask|----->| * | + * *-----------------* | | + * Power registers: | | + * *-----------------* | | + * |status/event/mask|----->| * |---> CPU + * *-----------------* | | + * FAN registers: + * *-----------------* | | + * |status/event/mask|----->| * | + * *-----------------* | | + * *---* + * In case some system changed are detected: FAN in/out, PSU in/out, power + * cable attached/detached, relevant device is created or destroyed. + */ +static void mlxcpld_hotplug_work_handler(struct work_struct *work) +{ + struct mlxcpld_hotplug_priv_data *priv = container_of(work, + struct mlxcpld_hotplug_priv_data, dwork.work); + u8 val, aggr_asserted; + unsigned long flags; + + /* Mask aggregation event. */ + outb(0, priv->plat->top_aggr_offset + MLXCPLD_HOTPLUG_AGGR_MASK_OFF); + /* Read aggregation status. */ + val = inb(priv->plat->top_aggr_offset) & priv->plat->top_aggr_mask; + aggr_asserted = priv->aggr_cache ^ val; + priv->aggr_cache = val; + + /* Handle PSU configuration changes. */ + if (aggr_asserted & priv->plat->top_aggr_psu_mask) + mlxcpld_hotplug_work_helper(&priv->pdev->dev, priv->plat->psu, + 1, priv->plat->psu_reg_offset, + priv->plat->psu_mask, + &priv->psu_cache); + + /* Handle power cable configuration changes. */ + if (aggr_asserted & priv->plat->top_aggr_pwr_mask) + mlxcpld_hotplug_work_helper(&priv->pdev->dev, priv->plat->pwr, + 0, priv->plat->pwr_reg_offset, + priv->plat->pwr_mask, + &priv->pwr_cache); + + /* Handle FAN configuration changes. */ + if (aggr_asserted & priv->plat->top_aggr_fan_mask) + mlxcpld_hotplug_work_helper(&priv->pdev->dev, priv->plat->fan, + 1, priv->plat->fan_reg_offset, + priv->plat->fan_mask, + &priv->fan_cache); + + if (aggr_asserted) { + spin_lock_irqsave(&priv->lock, flags); + + /* + * It is possible, that some signals have been inserted, while + * interrupt has been masked by mlxcpld_hotplug_work_handler. + * In this case such signals will be missed. In order to handle + * these signals delayed work is canceled and work task + * re-scheduled for immediate execution. It allows to handle + * missed signals, if any. In other case work handler just + * validates that no new signals have been received during + * masking. + */ + cancel_delayed_work(&priv->dwork); + schedule_delayed_work(&priv->dwork, 0); + + spin_unlock_irqrestore(&priv->lock, flags); + + return; + } + + /* Unmask aggregation event (no need acknowledge). */ + outb(priv->plat->top_aggr_mask, priv->plat->top_aggr_offset + + MLXCPLD_HOTPLUG_AGGR_MASK_OFF); +} + +static void mlxcpld_hotplug_set_irq(struct mlxcpld_hotplug_priv_data *priv) +{ + /* Clear psu presense event. */ + outb(0, priv->plat->psu_reg_offset + MLXCPLD_HOTPLUG_EVENT_OFF); + /* Set psu initial status as mask and unmask psu event. */ + priv->psu_cache = priv->plat->psu_mask; + outb(priv->plat->psu_mask, priv->plat->psu_reg_offset + + MLXCPLD_HOTPLUG_MASK_OFF); + + /* Clear power cable event. */ + outb(0, priv->plat->pwr_reg_offset + MLXCPLD_HOTPLUG_EVENT_OFF); + /* Keep power initial status as zero and unmask power event. */ + outb(priv->plat->pwr_mask, priv->plat->pwr_reg_offset + + MLXCPLD_HOTPLUG_MASK_OFF); + + /* Clear fan presense event. */ + outb(0, priv->plat->fan_reg_offset + MLXCPLD_HOTPLUG_EVENT_OFF); + /* Set fan initial status as mask and unmask fan event. */ + priv->fan_cache = priv->plat->fan_mask; + outb(priv->plat->fan_mask, priv->plat->fan_reg_offset + + MLXCPLD_HOTPLUG_MASK_OFF); + + /* Keep aggregation initial status as zero and unmask events. */ + outb(priv->plat->top_aggr_mask, priv->plat->top_aggr_offset + + MLXCPLD_HOTPLUG_AGGR_MASK_OFF); + + /* Invoke work handler for initializing hot plug devices setting. */ + mlxcpld_hotplug_work_handler(&priv->dwork.work); + + enable_irq(priv->irq); +} + +static void mlxcpld_hotplug_unset_irq(struct mlxcpld_hotplug_priv_data *priv) +{ + int i; + + disable_irq(priv->irq); + cancel_delayed_work_sync(&priv->dwork); + + /* Mask aggregation event. */ + outb(0, priv->plat->top_aggr_offset + MLXCPLD_HOTPLUG_AGGR_MASK_OFF); + + /* Mask psu presense event. */ + outb(0, priv->plat->psu_reg_offset + MLXCPLD_HOTPLUG_MASK_OFF); + /* Clear psu presense event. */ + outb(0, priv->plat->psu_reg_offset + MLXCPLD_HOTPLUG_EVENT_OFF); + + /* Mask power cable event. */ + outb(0, priv->plat->pwr_reg_offset + MLXCPLD_HOTPLUG_MASK_OFF); + /* Clear power cable event. */ + outb(0, priv->plat->pwr_reg_offset + MLXCPLD_HOTPLUG_EVENT_OFF); + + /* Mask fan presense event. */ + outb(0, priv->plat->fan_reg_offset + MLXCPLD_HOTPLUG_MASK_OFF); + /* Clear fan presense event. */ + outb(0, priv->plat->fan_reg_offset + MLXCPLD_HOTPLUG_EVENT_OFF); + + /* Remove all the attached devices. */ + for (i = 0; i < priv->plat->psu_count; i++) + mlxcpld_hotplug_device_destroy(priv->plat->psu + i); + + for (i = 0; i < priv->plat->pwr_count; i++) + mlxcpld_hotplug_device_destroy(priv->plat->pwr + i); + + for (i = 0; i < priv->plat->fan_count; i++) + mlxcpld_hotplug_device_destroy(priv->plat->fan + i); +} + +static irqreturn_t mlxcpld_hotplug_irq_handler(int irq, void *dev) +{ + struct mlxcpld_hotplug_priv_data *priv = + (struct mlxcpld_hotplug_priv_data *)dev; + + /* Schedule work task for immediate execution.*/ + schedule_delayed_work(&priv->dwork, 0); + + return IRQ_HANDLED; +} + +static int mlxcpld_hotplug_probe(struct platform_device *pdev) +{ + struct mlxcpld_hotplug_platform_data *pdata; + struct mlxcpld_hotplug_priv_data *priv; + int err; + + pdata = dev_get_platdata(&pdev->dev); + if (!pdata) { + dev_err(&pdev->dev, "Failed to get platform data.\n"); + return -EINVAL; + } + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->pdev = pdev; + priv->plat = pdata; + + priv->irq = platform_get_irq(pdev, 0); + if (priv->irq < 0) { + dev_err(&pdev->dev, "Failed to get platform irq: %d\n", + priv->irq); + return priv->irq; + } + + err = devm_request_irq(&pdev->dev, priv->irq, + mlxcpld_hotplug_irq_handler, 0, pdev->name, + priv); + if (err) { + dev_err(&pdev->dev, "Failed to request irq: %d\n", err); + return err; + } + disable_irq(priv->irq); + + INIT_DELAYED_WORK(&priv->dwork, mlxcpld_hotplug_work_handler); + spin_lock_init(&priv->lock); + + err = mlxcpld_hotplug_attr_init(priv); + if (err) { + dev_err(&pdev->dev, "Failed to allocate attributes: %d\n", err); + return err; + } + + priv->hwmon = devm_hwmon_device_register_with_groups(&pdev->dev, + "mlxcpld_hotplug", priv, priv->groups); + if (IS_ERR(priv->hwmon)) { + dev_err(&pdev->dev, "Failed to register hwmon device %ld\n", + PTR_ERR(priv->hwmon)); + return PTR_ERR(priv->hwmon); + } + + platform_set_drvdata(pdev, priv); + + /* Perform initial interrupts setup. */ + mlxcpld_hotplug_set_irq(priv); + + return 0; +} + +static int mlxcpld_hotplug_remove(struct platform_device *pdev) +{ + struct mlxcpld_hotplug_priv_data *priv = platform_get_drvdata(pdev); + + /* Clean interrupts setup. */ + mlxcpld_hotplug_unset_irq(priv); + + return 0; +} + +static struct platform_driver mlxcpld_hotplug_driver = { + .driver = { + .name = "mlxcpld-hotplug", + }, + .probe = mlxcpld_hotplug_probe, + .remove = mlxcpld_hotplug_remove, +}; + +module_platform_driver(mlxcpld_hotplug_driver); + +MODULE_AUTHOR("Vadim Pasternak "); +MODULE_DESCRIPTION("Mellanox CPLD hotplug platform driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_ALIAS("platform:mlxcpld-hotplug"); diff --git a/include/linux/platform_data/mlxcpld-hotplug.h b/include/linux/platform_data/mlxcpld-hotplug.h new file mode 100644 index 000000000000..e4cfcffaa6f4 --- /dev/null +++ b/include/linux/platform_data/mlxcpld-hotplug.h @@ -0,0 +1,99 @@ +/* + * include/linux/platform_data/mlxcpld-hotplug.h + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Vadim Pasternak + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __LINUX_PLATFORM_DATA_MLXCPLD_HOTPLUG_H +#define __LINUX_PLATFORM_DATA_MLXCPLD_HOTPLUG_H + +/** + * struct mlxcpld_hotplug_device - I2C device data: + * @adapter: I2C device adapter; + * @client: I2C device client; + * @brdinfo: device board information; + * @bus: I2C bus, where device is attached; + * + * Structure represents I2C hotplug device static data (board topology) and + * dynamic data (related kernel objects handles). + */ +struct mlxcpld_hotplug_device { + struct i2c_adapter *adapter; + struct i2c_client *client; + struct i2c_board_info brdinfo; + u16 bus; +}; + +/** + * struct mlxcpld_hotplug_platform_data - device platform data: + * @top_aggr_offset: offset of top aggregation interrupt register; + * @top_aggr_mask: top aggregation interrupt common mask; + * @top_aggr_psu_mask: top aggregation interrupt PSU mask; + * @psu_reg_offset: offset of PSU interrupt register; + * @psu_mask: PSU interrupt mask; + * @psu_count: number of equipped replaceable PSUs; + * @psu: pointer to PSU devices data array; + * @top_aggr_pwr_mask: top aggregation interrupt power mask; + * @pwr_reg_offset: offset of power interrupt register + * @pwr_mask: power interrupt mask; + * @pwr_count: number of power sources; + * @pwr: pointer to power devices data array; + * @top_aggr_fan_mask: top aggregation interrupt FAN mask; + * @fan_reg_offset: offset of FAN interrupt register; + * @fan_mask: FAN interrupt mask; + * @fan_count: number of equipped replaceable FANs; + * @fan: pointer to FAN devices data array; + * + * Structure represents board platform data, related to system hotplug events, + * like FAN, PSU, power cable insertion and removing. This data provides the + * number of hot-pluggable devices and hardware description for event handling. + */ +struct mlxcpld_hotplug_platform_data { + u16 top_aggr_offset; + u8 top_aggr_mask; + u8 top_aggr_psu_mask; + u16 psu_reg_offset; + u8 psu_mask; + u8 psu_count; + struct mlxcpld_hotplug_device *psu; + u8 top_aggr_pwr_mask; + u16 pwr_reg_offset; + u8 pwr_mask; + u8 pwr_count; + struct mlxcpld_hotplug_device *pwr; + u8 top_aggr_fan_mask; + u16 fan_reg_offset; + u8 fan_mask; + u8 fan_count; + struct mlxcpld_hotplug_device *fan; +}; + +#endif /* __LINUX_PLATFORM_DATA_MLXCPLD_HOTPLUG_H */ -- cgit v1.2.3 From b440f1d90ec54fd2586537ea46e958343ad4b151 Mon Sep 17 00:00:00 2001 From: Tomas Novotny Date: Tue, 11 Oct 2016 15:57:40 +0200 Subject: iio: dac: mcp4725: use regulator framework Use a standard framework to get the reference voltage. It is done that way in the iio subsystem and it will simplify extending of the driver. Structure mcp4725_platform_data is left undeleted because it used in the next patch. This change breaks the current users of the driver, but there is no mainline user of struct mcp4725_platform_data. Signed-off-by: Tomas Novotny Signed-off-by: Jonathan Cameron --- drivers/iio/dac/mcp4725.c | 46 +++++++++++++++++++++++++++++++++-------- include/linux/iio/dac/mcp4725.h | 1 - 2 files changed, 37 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/dac/mcp4725.c b/drivers/iio/dac/mcp4725.c index cca935c06f2b..2b28b1f5b3a2 100644 --- a/drivers/iio/dac/mcp4725.c +++ b/drivers/iio/dac/mcp4725.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -28,10 +29,10 @@ struct mcp4725_data { struct i2c_client *client; - u16 vref_mv; u16 dac_value; bool powerdown; unsigned powerdown_mode; + struct regulator *vdd_reg; }; static int mcp4725_suspend(struct device *dev) @@ -283,13 +284,18 @@ static int mcp4725_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct mcp4725_data *data = iio_priv(indio_dev); + int ret; switch (mask) { case IIO_CHAN_INFO_RAW: *val = data->dac_value; return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: - *val = data->vref_mv; + ret = regulator_get_voltage(data->vdd_reg); + if (ret < 0) + return ret; + + *val = ret / 1000; *val2 = 12; return IIO_VAL_FRACTIONAL_LOG2; } @@ -328,12 +334,12 @@ static int mcp4725_probe(struct i2c_client *client, { struct mcp4725_data *data; struct iio_dev *indio_dev; - struct mcp4725_platform_data *platform_data = client->dev.platform_data; + struct mcp4725_platform_data *pdata = dev_get_platdata(&client->dev); u8 inbuf[3]; u8 pd; int err; - if (!platform_data || !platform_data->vref_mv) { + if (!pdata) { dev_err(&client->dev, "invalid platform data"); return -EINVAL; } @@ -345,6 +351,14 @@ static int mcp4725_probe(struct i2c_client *client, i2c_set_clientdata(client, indio_dev); data->client = client; + data->vdd_reg = devm_regulator_get(&client->dev, "vdd"); + if (IS_ERR(data->vdd_reg)) + return PTR_ERR(data->vdd_reg); + + err = regulator_enable(data->vdd_reg); + if (err) + return err; + indio_dev->dev.parent = &client->dev; indio_dev->name = id->name; indio_dev->info = &mcp4725_info; @@ -352,25 +366,39 @@ static int mcp4725_probe(struct i2c_client *client, indio_dev->num_channels = 1; indio_dev->modes = INDIO_DIRECT_MODE; - data->vref_mv = platform_data->vref_mv; - /* read current DAC value */ err = i2c_master_recv(client, inbuf, 3); if (err < 0) { dev_err(&client->dev, "failed to read DAC value"); - return err; + goto err_disable_vdd_reg; } pd = (inbuf[0] >> 1) & 0x3; data->powerdown = pd > 0 ? true : false; data->powerdown_mode = pd ? pd - 1 : 2; /* largest register to gnd */ data->dac_value = (inbuf[1] << 4) | (inbuf[2] >> 4); - return iio_device_register(indio_dev); + err = iio_device_register(indio_dev); + if (err) + goto err_disable_vdd_reg; + + return 0; + + +err_disable_vdd_reg: + regulator_disable(data->vdd_reg); + + return err; } static int mcp4725_remove(struct i2c_client *client) { - iio_device_unregister(i2c_get_clientdata(client)); + struct iio_dev *indio_dev = i2c_get_clientdata(client); + struct mcp4725_data *data = iio_priv(indio_dev); + + iio_device_unregister(indio_dev); + + regulator_disable(data->vdd_reg); + return 0; } diff --git a/include/linux/iio/dac/mcp4725.h b/include/linux/iio/dac/mcp4725.h index 91530e6611e9..7c062e8d2a48 100644 --- a/include/linux/iio/dac/mcp4725.h +++ b/include/linux/iio/dac/mcp4725.h @@ -10,7 +10,6 @@ #define IIO_DAC_MCP4725_H_ struct mcp4725_platform_data { - u16 vref_mv; }; #endif /* IIO_DAC_MCP4725_H_ */ -- cgit v1.2.3 From 29157c6d601db8cb9f3bea93fc933b73db3bf869 Mon Sep 17 00:00:00 2001 From: Tomas Novotny Date: Tue, 18 Oct 2016 19:43:08 +0200 Subject: iio: dac: mcp4725: support voltage reference selection MCP47x6 chip supports selection of a voltage reference (VDD, VREF buffered or unbuffered). MCP4725 doesn't have this feature thus the eventual setting is ignored and user is warned. The setting is stored only in the volatile memory of the chip. You need to manually store it to the EEPROM of the chip via 'store_eeprom' sysfs entry. Signed-off-by: Tomas Novotny Signed-off-by: Jonathan Cameron --- drivers/iio/dac/mcp4725.c | 99 ++++++++++++++++++++++++++++++++++++++--- include/linux/iio/dac/mcp4725.h | 11 +++++ 2 files changed, 103 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/dac/mcp4725.c b/drivers/iio/dac/mcp4725.c index 5b2dfa0a0d2c..1e9d8f387e00 100644 --- a/drivers/iio/dac/mcp4725.c +++ b/drivers/iio/dac/mcp4725.c @@ -27,12 +27,20 @@ #define MCP4725_DRV_NAME "mcp4725" +#define MCP472X_REF_VDD 0x00 +#define MCP472X_REF_VREF_UNBUFFERED 0x02 +#define MCP472X_REF_VREF_BUFFERED 0x03 + struct mcp4725_data { struct i2c_client *client; + int id; + unsigned ref_mode; + bool vref_buffered; u16 dac_value; bool powerdown; unsigned powerdown_mode; struct regulator *vdd_reg; + struct regulator *vref_reg; }; static int mcp4725_suspend(struct device *dev) @@ -87,6 +95,7 @@ static ssize_t mcp4725_store_eeprom(struct device *dev, return 0; inoutbuf[0] = 0x60; /* write EEPROM */ + inoutbuf[0] |= data->ref_mode << 3; inoutbuf[1] = data->dac_value >> 4; inoutbuf[2] = (data->dac_value & 0xf) << 4; @@ -279,6 +288,28 @@ static int mcp4725_set_value(struct iio_dev *indio_dev, int val) return 0; } +static int mcp4726_set_cfg(struct iio_dev *indio_dev) +{ + struct mcp4725_data *data = iio_priv(indio_dev); + u8 outbuf[3]; + int ret; + + outbuf[0] = 0x40; + outbuf[0] |= data->ref_mode << 3; + if (data->powerdown) + outbuf[0] |= data->powerdown << 1; + outbuf[1] = data->dac_value >> 4; + outbuf[2] = (data->dac_value & 0xf) << 4; + + ret = i2c_master_send(data->client, outbuf, 3); + if (ret < 0) + return ret; + else if (ret != 3) + return -EIO; + else + return 0; +} + static int mcp4725_read_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int *val, int *val2, long mask) @@ -291,7 +322,11 @@ static int mcp4725_read_raw(struct iio_dev *indio_dev, *val = data->dac_value; return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: - ret = regulator_get_voltage(data->vdd_reg); + if (data->ref_mode == MCP472X_REF_VDD) + ret = regulator_get_voltage(data->vdd_reg); + else + ret = regulator_get_voltage(data->vref_reg); + if (ret < 0) return ret; @@ -335,8 +370,9 @@ static int mcp4725_probe(struct i2c_client *client, struct mcp4725_data *data; struct iio_dev *indio_dev; struct mcp4725_platform_data *pdata = dev_get_platdata(&client->dev); - u8 inbuf[3]; + u8 inbuf[4]; u8 pd; + u8 ref; int err; if (!pdata) { @@ -350,6 +386,26 @@ static int mcp4725_probe(struct i2c_client *client, data = iio_priv(indio_dev); i2c_set_clientdata(client, indio_dev); data->client = client; + data->id = id->driver_data; + + if (data->id == MCP4725 && pdata->use_vref) { + dev_err(&client->dev, + "external reference is unavailable on MCP4725"); + return -EINVAL; + } + + if (!pdata->use_vref && pdata->vref_buffered) { + dev_err(&client->dev, + "buffering is unavailable on the internal reference"); + return -EINVAL; + } + + if (!pdata->use_vref) + data->ref_mode = MCP472X_REF_VDD; + else + data->ref_mode = pdata->vref_buffered ? + MCP472X_REF_VREF_BUFFERED : + MCP472X_REF_VREF_UNBUFFERED; data->vdd_reg = devm_regulator_get(&client->dev, "vdd"); if (IS_ERR(data->vdd_reg)) @@ -359,6 +415,18 @@ static int mcp4725_probe(struct i2c_client *client, if (err) return err; + if (pdata->use_vref) { + data->vref_reg = devm_regulator_get(&client->dev, "vref"); + if (IS_ERR(data->vref_reg)) { + err = PTR_ERR(data->vdd_reg); + goto err_disable_vdd_reg; + } + + err = regulator_enable(data->vref_reg); + if (err) + goto err_disable_vdd_reg; + } + indio_dev->dev.parent = &client->dev; indio_dev->name = id->name; indio_dev->info = &mcp4725_info; @@ -366,23 +434,38 @@ static int mcp4725_probe(struct i2c_client *client, indio_dev->num_channels = 1; indio_dev->modes = INDIO_DIRECT_MODE; - /* read current DAC value */ - err = i2c_master_recv(client, inbuf, 3); + /* read current DAC value and settings */ + err = i2c_master_recv(client, inbuf, data->id == MCP4725 ? 3 : 4); + if (err < 0) { dev_err(&client->dev, "failed to read DAC value"); - goto err_disable_vdd_reg; + goto err_disable_vref_reg; } pd = (inbuf[0] >> 1) & 0x3; data->powerdown = pd > 0 ? true : false; data->powerdown_mode = pd ? pd - 1 : 2; /* largest resistor to gnd */ data->dac_value = (inbuf[1] << 4) | (inbuf[2] >> 4); - + if (data->id == MCP4726) + ref = (inbuf[3] >> 3) & 0x3; + + if (data->id == MCP4726 && ref != data->ref_mode) { + dev_info(&client->dev, + "voltage reference mode differs (conf: %u, eeprom: %u), setting %u", + data->ref_mode, ref, data->ref_mode); + err = mcp4726_set_cfg(indio_dev); + if (err < 0) + goto err_disable_vref_reg; + } + err = iio_device_register(indio_dev); if (err) - goto err_disable_vdd_reg; + goto err_disable_vref_reg; return 0; +err_disable_vref_reg: + if (data->vref_reg) + regulator_disable(data->vref_reg); err_disable_vdd_reg: regulator_disable(data->vdd_reg); @@ -397,6 +480,8 @@ static int mcp4725_remove(struct i2c_client *client) iio_device_unregister(indio_dev); + if (data->vref_reg) + regulator_disable(data->vref_reg); regulator_disable(data->vdd_reg); return 0; diff --git a/include/linux/iio/dac/mcp4725.h b/include/linux/iio/dac/mcp4725.h index 7c062e8d2a48..628b2cf54c50 100644 --- a/include/linux/iio/dac/mcp4725.h +++ b/include/linux/iio/dac/mcp4725.h @@ -9,7 +9,18 @@ #ifndef IIO_DAC_MCP4725_H_ #define IIO_DAC_MCP4725_H_ +/** + * struct mcp4725_platform_data - MCP4725/6 DAC specific data. + * @use_vref: Whether an external reference voltage on Vref pin should be used. + * Additional vref-supply must be specified when used. + * @vref_buffered: Controls buffering of the external reference voltage. + * + * Vref related settings are available only on MCP4756. See + * Documentation/devicetree/bindings/iio/dac/mcp4725.txt for more information. + */ struct mcp4725_platform_data { + bool use_vref; + bool vref_buffered; }; #endif /* IIO_DAC_MCP4725_H_ */ -- cgit v1.2.3 From 7b889adbac8db4220e07c93fff0b0b235d08496b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 23 Oct 2016 09:30:44 -0700 Subject: doc: add missing docbook parameter for fence-array Fixes 'make htmldocs' warning. Signed-off-by: Stephen Hemminger Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20161023093044.324edfb6@xeon-e3 --- include/linux/fence-array.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fence-array.h b/include/linux/fence-array.h index a44794e508df..9ea2bde10ac1 100644 --- a/include/linux/fence-array.h +++ b/include/linux/fence-array.h @@ -53,6 +53,7 @@ extern const struct fence_ops fence_array_ops; /** * fence_is_array - check if a fence is from the array subsclass + * @fence: fence to test * * Return true if it is a fence_array and false otherwise. */ -- cgit v1.2.3 From 8c27ceff3604b249a9efafbd1bd8b141b79e619d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 18 Oct 2016 10:12:27 -0200 Subject: docs: fix locations of several documents that got moved The previous patch renamed several files that are cross-referenced along the Kernel documentation. Adjust the links to point to the right places. Signed-off-by: Mauro Carvalho Chehab --- Documentation/00-INDEX | 54 +++++++++++----------- Documentation/ABI/README | 2 +- Documentation/ABI/testing/sysfs-kernel-slab | 2 +- Documentation/DocBook/kernel-hacking.tmpl | 4 +- Documentation/acpi/video_extension.txt | 2 +- Documentation/admin-guide/README.rst | 13 +++--- Documentation/admin-guide/bad-memory.rst | 2 +- Documentation/admin-guide/binfmt-misc.rst | 4 +- Documentation/admin-guide/braille-console.rst | 6 +-- Documentation/admin-guide/bug-hunting.rst | 7 +-- Documentation/admin-guide/devices.rst | 2 +- Documentation/admin-guide/kernel-parameters.rst | 6 +-- Documentation/admin-guide/oops-tracing.rst | 2 +- Documentation/admin-guide/ramoops.rst | 2 +- Documentation/admin-guide/reporting-bugs.rst | 6 +-- Documentation/admin-guide/security-bugs.rst | 2 +- Documentation/admin-guide/unicode.rst | 2 +- Documentation/arm/Booting | 2 +- Documentation/atomic_ops.txt | 2 +- Documentation/blockdev/ramdisk.txt | 2 +- Documentation/cgroup-v1/00-INDEX | 2 +- .../devicetree/bindings/rtc/maxim,ds3231.txt | 2 +- Documentation/devicetree/bindings/rtc/pcf8563.txt | 2 +- .../devicetree/bindings/submitting-patches.txt | 2 +- Documentation/filesystems/locks.txt | 2 +- Documentation/filesystems/nfs/nfsroot.txt | 4 +- Documentation/frv/booting.txt | 2 +- Documentation/hwmon/submitting-patches | 8 ++-- Documentation/isdn/README | 2 +- Documentation/ja_JP/HOWTO | 24 +++++----- Documentation/ja_JP/SubmitChecklist | 8 ++-- Documentation/ja_JP/SubmittingPatches | 18 ++++---- Documentation/ja_JP/stable_api_nonsense.txt | 4 +- Documentation/ja_JP/stable_kernel_rules.txt | 6 +-- Documentation/kernel-per-CPU-kthreads.txt | 2 +- Documentation/ko_KR/HOWTO | 30 ++++++------ Documentation/ko_KR/stable_api_nonsense.txt | 4 +- Documentation/lockup-watchdogs.txt | 4 +- Documentation/m68k/kernel-options.txt | 2 +- Documentation/media/uapi/v4l/diff-v4l.rst | 4 +- Documentation/media/v4l-drivers/bttv.rst | 4 +- Documentation/memory-hotplug.txt | 2 +- Documentation/networking/netconsole.txt | 2 +- Documentation/networking/netdev-FAQ.txt | 8 ++-- Documentation/networking/vortex.txt | 2 +- Documentation/power/00-INDEX | 2 +- Documentation/power/pci.txt | 10 ++-- Documentation/power/runtime_pm.txt | 2 +- Documentation/power/swsusp-dmcrypt.txt | 2 +- Documentation/process/4.Coding.rst | 4 +- Documentation/process/5.Posting.rst | 12 ++--- Documentation/process/8.Conclusion.rst | 6 +-- Documentation/process/adding-syscalls.rst | 2 +- Documentation/process/coding-style.rst | 2 +- Documentation/process/howto.rst | 24 +++++----- Documentation/process/management-style.rst | 2 +- Documentation/process/stable-kernel-rules.rst | 4 +- Documentation/process/submit-checklist.rst | 6 +-- Documentation/process/submitting-drivers.rst | 8 ++-- Documentation/process/submitting-patches.rst | 14 +++--- Documentation/rfkill.txt | 2 +- Documentation/scsi/scsi-parameters.txt | 2 +- Documentation/scsi/scsi_mid_low_api.txt | 2 +- Documentation/scsi/sym53c8xx_2.txt | 2 +- Documentation/sound/alsa/alsa-parameters.txt | 2 +- Documentation/sound/oss/oss-parameters.txt | 2 +- Documentation/sysctl/kernel.txt | 4 +- Documentation/virtual/kvm/review-checklist.txt | 4 +- Documentation/vm/numa | 2 +- .../watchdog/convert_drivers_to_kernel_api.txt | 2 +- Documentation/watchdog/watchdog-parameters.txt | 2 +- Documentation/x86/boot.txt | 2 +- Documentation/zh_CN/CodingStyle | 6 +-- Documentation/zh_CN/HOWTO | 30 ++++++------ Documentation/zh_CN/SecurityBugs | 6 +-- Documentation/zh_CN/SubmittingDrivers | 12 ++--- Documentation/zh_CN/SubmittingPatches | 14 +++--- Documentation/zh_CN/arm/Booting | 2 +- Documentation/zh_CN/email-clients.txt | 4 +- Documentation/zh_CN/oops-tracing.txt | 6 +-- Documentation/zh_CN/stable_api_nonsense.txt | 4 +- Documentation/zh_CN/stable_kernel_rules.txt | 6 +-- .../zh_CN/volatile-considered-harmful.txt | 4 +- MAINTAINERS | 10 ++-- arch/x86/Kconfig | 2 +- drivers/acpi/Kconfig | 2 +- drivers/ata/libata-core.c | 2 +- drivers/char/pcmcia/cm4000_cs.c | 4 +- drivers/net/can/grcan.c | 2 +- drivers/nvdimm/Kconfig | 2 +- drivers/staging/vme/devices/vme_user.c | 2 +- drivers/video/fbdev/skeletonfb.c | 8 ++-- drivers/virtio/Kconfig | 2 +- fs/Kconfig.binfmt | 4 +- fs/pstore/Kconfig | 2 +- include/linux/device.h | 2 +- include/linux/pm.h | 2 +- include/uapi/linux/major.h | 2 +- init/Kconfig | 2 +- init/main.c | 2 +- lib/Kconfig.debug | 2 +- scripts/checkpatch.pl | 6 +-- tools/testing/selftests/futex/README | 2 +- 103 files changed, 280 insertions(+), 278 deletions(-) (limited to 'include/linux') diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index d07575a8499e..39caa6544d1f 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -15,11 +15,11 @@ Following translations are available on the WWW: ABI/ - info on kernel <-> userspace ABI and relative interface stability. -BUG-HUNTING +admin-guide/bug-hunting.rst - brute force method of doing binary search of patches to find bug. -Changes +process/changes.rst - list of changes that break older software packages. -CodingStyle +process/coding-style.rst - how the maintainers expect the C code in the kernel to look. DMA-API.txt - DMA API, pci_ API & extensions for non-consistent memory machines. @@ -33,7 +33,7 @@ DocBook/ - directory with DocBook templates etc. for kernel documentation. EDID/ - directory with info on customizing EDID for broken gfx/displays. -HOWTO +process/howto.rst - the process and procedures of how to do Linux kernel development. IPMI.txt - info on Linux Intelligent Platform Management Interface (IPMI) Driver. @@ -48,7 +48,7 @@ Intel-IOMMU.txt Makefile - This file does nothing. Removing it breaks make htmldocs and make distclean. -ManagementStyle +process/management-style.rst - how to (attempt to) manage kernel hackers. RCU/ - directory with info on RCU (read-copy update). @@ -56,13 +56,13 @@ SAK.txt - info on Secure Attention Keys. SM501.txt - Silicon Motion SM501 multimedia companion chip -SecurityBugs +admin-guide/security-bugs.rst - procedure for reporting security bugs found in the kernel. -SubmitChecklist +process/submit-checklist.rst - Linux kernel patch submission checklist. -SubmittingDrivers +process/submitting-drivers.rst - procedure to get a new driver source included into the kernel tree. -SubmittingPatches +process/submitting-patches.rst - procedure to get a source patch included into the kernel tree. VGA-softcursor.txt - how to change your VGA cursor from a blinking underscore. @@ -72,7 +72,7 @@ acpi/ - info on ACPI-specific hooks in the kernel. aoe/ - description of AoE (ATA over Ethernet) along with config examples. -applying-patches.txt +process/applying-patches.rst - description of various trees and how to apply their patches. arm/ - directory with info about Linux on the ARM architecture. @@ -86,7 +86,7 @@ auxdisplay/ - misc. LCD driver documentation (cfag12864b, ks0108). backlight/ - directory with info on controlling backlights in flat panel displays -bad_memory.txt +admin-guide/bad-memory.rst - how to use kernel parameters to exclude bad RAM regions. basic_profiling.txt - basic instructions for those who wants to profile Linux kernel. @@ -154,7 +154,7 @@ process/ - how to work with the mainline kernel development process. device-mapper/ - directory with info on Device Mapper. -devices.txt +admin-guide/devices.rst - plain ASCII listing of all the nodes in /dev/ with major minor #'s. devicetree/ - directory with info on device tree files used by OF/PowerPC/ARM @@ -178,7 +178,7 @@ efi-stub.txt - How to use the EFI boot stub to bypass GRUB or elilo on EFI systems. eisa.txt - info on EISA bus support. -email-clients.txt +process/email-clients.rst - info on how to use e-mail to send un-mangled (git) patches. extcon/ - directory with porting guide for Android kernel switch driver. @@ -226,9 +226,9 @@ ia64/ - directory with info about Linux on Intel 64 bit architecture. infiniband/ - directory with documents concerning Linux InfiniBand support. -init.txt +admin-guide/init.rst - what to do when the kernel can't find the 1st process to run. -initrd.txt +admin-guide/initrd.rst - how to use the RAM disk as an initial/temporary root filesystem. input/ - info on Linux input device support. @@ -248,7 +248,7 @@ isapnp.txt - info on Linux ISA Plug & Play support. isdn/ - directory with info on the Linux ISDN support, and supported cards. -java.txt +admin-guide/java.rst - info on the in-kernel binary support for Java(tm). ja_JP/ - directory with Japanese translations of various documents @@ -256,11 +256,11 @@ kbuild/ - directory with info about the kernel build process. kdump/ - directory with mini HowTo on getting the crash dump code to work. -kernel-docs.txt +process/kernel-docs.rst - listing of various WWW + books that document kernel internals. kernel-documentation.rst - how to write and format reStructuredText kernel documentation -kernel-parameters.txt +admin-guide/kernel-parameters.rst - summary listing of command line / boot prompt args for the kernel. kernel-per-CPU-kthreads.txt - List of all per-CPU kthreads and how they introduce jitter. @@ -302,7 +302,7 @@ magic-number.txt - list of magic numbers used to mark/protect kernel data structures. mailbox.txt - How to write drivers for the common mailbox framework (IPC). -md.txt +admin-guide/md.rst - info on boot arguments for the multiple devices driver. media-framework.txt - info on media framework, its data structures, functions and usage. @@ -326,7 +326,7 @@ module-signing.txt - Kernel module signing for increased security when loading modules. mtd/ - directory with info about memory technology devices (flash) -mono.txt +admin-guide/mono.rst - how to execute Mono-based .NET binaries with the help of BINFMT_MISC. namespaces/ - directory with various information about namespaces @@ -340,7 +340,7 @@ nommu-mmap.txt - documentation about no-mmu memory mapping support. numastat.txt - info on how to read Numa policy hit/miss statistics in sysfs. -oops-tracing.txt +admin-guide/oops-tracing.rst - how to decode those nasty internal kernel error dump messages. padata.txt - An introduction to the "padata" parallel execution API @@ -378,7 +378,7 @@ ptp/ - directory with info on support for IEEE 1588 PTP clocks in Linux. pwm.txt - info on the pulse width modulation driver subsystem -ramoops.txt +admin-guide/ramoops.rst - documentation of the ramoops oops/panic logging module. rapidio/ - directory with info on RapidIO packet-based fabric interconnect @@ -406,7 +406,7 @@ security/ - directory that contains security-related info serial/ - directory with info on the low level serial API. -serial-console.txt +admin-guide/serial-console.rst - how to set up Linux with a serial line console as the default. sgi-ioc4.txt - description of the SGI IOC4 PCI (multi function) device. @@ -420,9 +420,9 @@ sparse.txt - info on how to obtain and use the sparse tool for typechecking. spi/ - overview of Linux kernel Serial Peripheral Interface (SPI) support. -stable_api_nonsense.txt +process/stable-api-nonsense.rst - info on why the kernel does not have a stable in-kernel api or abi. -stable_kernel_rules.txt +process/stable-kernel-rules.rst - rules and procedures for the -stable kernel releases. static-keys.txt - info on how static keys allow debug code in hotpaths via patching @@ -444,7 +444,7 @@ trace/ - directory with info on tracing technologies within linux unaligned-memory-access.txt - info on how to avoid arch breaking unaligned memory access in code. -unicode.txt +admin-guide/unicode.rst - info on the Unicode character/font mapping used in Linux. unshare.txt - description of the Linux unshare system call. @@ -466,7 +466,7 @@ vm/ - directory with info on the Linux vm code. vme_api.txt - file relating info on the VME bus API in linux -volatile-considered-harmful.txt +process/volatile-considered-harmful.rst - Why the "volatile" type class should not be used w1/ - directory with documents regarding the 1-wire (w1) subsystem. diff --git a/Documentation/ABI/README b/Documentation/ABI/README index 1fafc4b0753b..3121029dce21 100644 --- a/Documentation/ABI/README +++ b/Documentation/ABI/README @@ -84,4 +84,4 @@ stable: - Kernel-internal symbols. Do not rely on the presence, absence, location, or type of any kernel symbol, either in System.map files or the kernel binary - itself. See Documentation/stable_api_nonsense.txt. + itself. See Documentation/process/stable-api-nonsense.rst. diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab index 91bd6ca5440f..2cc0a72b64be 100644 --- a/Documentation/ABI/testing/sysfs-kernel-slab +++ b/Documentation/ABI/testing/sysfs-kernel-slab @@ -347,7 +347,7 @@ Description: because of fragmentation, SLUB will retry with the minimum order possible depending on its characteristics. When debug_guardpage_minorder=N (N > 0) parameter is specified - (see Documentation/kernel-parameters.txt), the minimum possible + (see Documentation/admin-guide/kernel-parameters.rst), the minimum possible order is used and this sysfs entry can not be used to change the order at run time. diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl index 2a272275c81b..da5c087462b1 100644 --- a/Documentation/DocBook/kernel-hacking.tmpl +++ b/Documentation/DocBook/kernel-hacking.tmpl @@ -1208,8 +1208,8 @@ static struct block_device_operations opt_fops = { - Finally, don't forget to read Documentation/SubmittingPatches - and possibly Documentation/SubmittingDrivers. + Finally, don't forget to read Documentation/process/submitting-patches.rst + and possibly Documentation/process/submitting-drivers.rst. diff --git a/Documentation/acpi/video_extension.txt b/Documentation/acpi/video_extension.txt index 78b32ac02466..79bf6a4921be 100644 --- a/Documentation/acpi/video_extension.txt +++ b/Documentation/acpi/video_extension.txt @@ -101,6 +101,6 @@ received a notification, it will set the backlight level accordingly. This does not affect the sending of event to user space, they are always sent to user space regardless of whether or not the video module controls the backlight level directly. This behaviour can be controlled through the brightness_switch_enabled -module parameter as documented in kernel-parameters.txt. It is recommended to +module parameter as documented in admin-guide/kernel-parameters.rst. It is recommended to disable this behaviour once a GUI environment starts up and wants to have full control of the backlight level. diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst index 05aad8543340..1b6dfb2b3adb 100644 --- a/Documentation/admin-guide/README.rst +++ b/Documentation/admin-guide/README.rst @@ -50,7 +50,8 @@ Documentation - There are various README files in the Documentation/ subdirectory: these typically contain kernel-specific installation notes for some drivers for example. See Documentation/00-INDEX for a list of what - is contained in each file. Please read the Changes file, as it + is contained in each file. Please read the + :ref:`Documentation/process/changes.rst ` file, as it contains information about the problems, which may result by upgrading your kernel. @@ -96,7 +97,7 @@ Installing the kernel source and 4.0.2 patches. Similarly, if you are running kernel version 4.0.2 and want to jump to 4.0.3, you must first reverse the 4.0.2 patch (that is, patch -R) **before** applying the 4.0.3 patch. You can read more on this in - :ref:`Documentation/applying-patches.txt `. + :ref:`Documentation/process/applying-patches.rst `. Alternatively, the script patch-kernel can be used to automate this process. It determines the current kernel version and applies any @@ -120,7 +121,7 @@ Software requirements Compiling and running the 4.x kernels requires up-to-date versions of various software packages. Consult - :ref:`Documentation/Changes ` for the minimum version numbers + :ref:`Documentation/process/changes.rst ` for the minimum version numbers required and how to get updates for these packages. Beware that using excessively old versions of these packages can cause indirect errors that are very difficult to track down, so don't assume that @@ -254,7 +255,7 @@ Compiling the kernel -------------------- - Make sure you have at least gcc 3.2 available. - For more information, refer to :ref:`Documentation/Changes `. + For more information, refer to :ref:`Documentation/process/changes.rst `. Please note that you can still run a.out user programs with this kernel. @@ -355,7 +356,7 @@ If something goes wrong help debugging the problem. The text above the dump is also important: it tells something about why the kernel dumped code (in the above example, it's due to a bad kernel pointer). More information - on making sense of the dump is in Documentation/oops-tracing.txt + on making sense of the dump is in Documentation/admin-guide/oops-tracing.rst - If you compiled the kernel with CONFIG_KALLSYMS you can send the dump as is, otherwise you will have to use the ``ksymoops`` program to make @@ -393,7 +394,7 @@ If something goes wrong If you for some reason cannot do the above (you have a pre-compiled kernel image or similar), telling me as much about your setup as - possible will help. Please read the :ref:`REPORTING-BUGS ` + possible will help. Please read the :ref:`admin-guide/reporting-bugs.rst ` document for details. - Alternatively, you can use gdb on a running kernel. (read-only; i.e. you diff --git a/Documentation/admin-guide/bad-memory.rst b/Documentation/admin-guide/bad-memory.rst index 017fc86430c3..a5c0e25e496f 100644 --- a/Documentation/admin-guide/bad-memory.rst +++ b/Documentation/admin-guide/bad-memory.rst @@ -33,7 +33,7 @@ memmap is already in the kernel and usable as kernel-parameter at boot-time. Its syntax is slightly strange and you may need to calculate the values by yourself! -Syntax to exclude a memory area (see kernel-parameters.txt for details):: +Syntax to exclude a memory area (see admin-guide/kernel-parameters.rst for details):: memmap=$
diff --git a/Documentation/admin-guide/binfmt-misc.rst b/Documentation/admin-guide/binfmt-misc.rst index 9c5ff8f260bf..97b0d7927078 100644 --- a/Documentation/admin-guide/binfmt-misc.rst +++ b/Documentation/admin-guide/binfmt-misc.rst @@ -124,7 +124,7 @@ A few examples (assumed you are in ``/proc/sys/fs/binfmt_misc``): echo ':DOSWin:M::MZ::/usr/local/bin/wine:' > register -For java support see Documentation/java.txt +For java support see Documentation/admin-guide/java.rst You can enable/disable binfmt_misc or one binary type by echoing 0 (to disable) @@ -140,7 +140,7 @@ Hints ----- If you want to pass special arguments to your interpreter, you can -write a wrapper script for it. See Documentation/java.txt for an +write a wrapper script for it. See Documentation/admin-guide/java.rst for an example. Your interpreter should NOT look in the PATH for the filename; the kernel diff --git a/Documentation/admin-guide/braille-console.rst b/Documentation/admin-guide/braille-console.rst index fa3702dc04ab..18e79337dcfd 100644 --- a/Documentation/admin-guide/braille-console.rst +++ b/Documentation/admin-guide/braille-console.rst @@ -3,7 +3,7 @@ Linux Braille Console To get early boot messages on a braille device (before userspace screen readers can start), you first need to compile the support for the usual serial -console (see :ref:`Documentation/serial-console.txt `), and +console (see :ref:`Documentation/admin-guide/serial-console.rst `), and for braille device (in :menuselection:`Device Drivers --> Accessibility support --> Console on braille device`). @@ -13,7 +13,7 @@ format is:: console=brl,serial_options... where ``serial_options...`` are the same as described in -:ref:`Documentation/serial-console.txt `. +:ref:`Documentation/admin-guide/serial-console.rst `. So for instance you can use ``console=brl,ttyS0`` if the braille device is connected to the first serial port, and ``console=brl,ttyS0,115200`` to override the baud rate to 115200, etc. @@ -31,7 +31,7 @@ parameter. For simplicity, only one braille console can be enabled, other uses of ``console=brl,...`` will be discarded. Also note that it does not interfere with the console selection mechanism described in -:ref:`Documentation/serial-console.txt `. +:ref:`Documentation/admin-guide/serial-console.rst `. For now, only the VisioBraille device is supported. diff --git a/Documentation/admin-guide/bug-hunting.rst b/Documentation/admin-guide/bug-hunting.rst index a8ef794aadae..d35dd9fd1af0 100644 --- a/Documentation/admin-guide/bug-hunting.rst +++ b/Documentation/admin-guide/bug-hunting.rst @@ -15,7 +15,7 @@ give up. Report as much as you have found to the relevant maintainer. See MAINTAINERS for who that is for the subsystem you have worked on. Before you submit a bug report read -:ref:`Documentation/REPORTING-BUGS `. +:ref:`Documentation/admin-guide/reporting-bugs.rst `. Devices not appearing ===================== @@ -244,5 +244,6 @@ Once you have worked out a fix please submit it upstream. After all open source is about sharing what you do and don't you want to be recognised for your genius? -Please do read :ref:`Documentation/SubmittingPatches ` -though to help your code get accepted. +Please do read +ref:`Documentation/process/submitting-patches.rst ` though +to help your code get accepted. diff --git a/Documentation/admin-guide/devices.rst b/Documentation/admin-guide/devices.rst index b29555041531..89db341fba7a 100644 --- a/Documentation/admin-guide/devices.rst +++ b/Documentation/admin-guide/devices.rst @@ -10,7 +10,7 @@ The LaTeX version of this document is no longer maintained, nor is the document that used to reside at lanana.org. This version in the mainline Linux kernel is the master document. Updates shall be sent as patches to the kernel maintainers (see the -:ref:`Documentation/SubmittingPatches ` document). +:ref:`Documentation/process/submitting-patches.rst ` document). Specifically explore the sections titled "CHAR and MISC DRIVERS", and "BLOCK LAYER" in the MAINTAINERS file to find the right maintainers to involve for character and block devices. diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst index b0804273b6e3..d2f2725f032e 100644 --- a/Documentation/admin-guide/kernel-parameters.rst +++ b/Documentation/admin-guide/kernel-parameters.rst @@ -815,7 +815,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted:: bits, and "f" is flow control ("r" for RTS or omit it). Default is "9600n8". - See Documentation/serial-console.txt for more + See Documentation/admin-guide/serial-console.rst for more information. See Documentation/networking/netconsole.txt for an alternative. @@ -2239,7 +2239,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted:: mce=option [X86-64] See Documentation/x86/x86_64/boot-options.txt md= [HW] RAID subsystems devices and level - See Documentation/md.txt. + See Documentation/admin-guide/md.rst. mdacon= [MDA] Format: , @@ -3322,7 +3322,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted:: r128= [HW,DRM] raid= [HW,RAID] - See Documentation/md.txt. + See Documentation/admin-guide/md.rst. ramdisk_size= [RAM] Sizes of RAM disks in kilobytes See Documentation/blockdev/ramdisk.txt. diff --git a/Documentation/admin-guide/oops-tracing.rst b/Documentation/admin-guide/oops-tracing.rst index 3e25ea7349ee..13be8d7bcfe7 100644 --- a/Documentation/admin-guide/oops-tracing.rst +++ b/Documentation/admin-guide/oops-tracing.rst @@ -44,7 +44,7 @@ the disk is not available then you have three options : so won't help for 'early' oopses) (2) Boot with a serial console (see - :ref:`Documentation/serial-console.txt `), + :ref:`Documentation/admin-guide/serial-console.rst `), run a null modem to a second machine and capture the output there using your favourite communication program. Minicom works well. diff --git a/Documentation/admin-guide/ramoops.rst b/Documentation/admin-guide/ramoops.rst index 7eaf1e71c083..fe95c027e37c 100644 --- a/Documentation/admin-guide/ramoops.rst +++ b/Documentation/admin-guide/ramoops.rst @@ -61,7 +61,7 @@ Setting the ramoops parameters can be done in several different manners: mem=128M ramoops.mem_address=0x8000000 ramoops.ecc=1 B. Use Device Tree bindings, as described in - ``Documentation/device-tree/bindings/reserved-memory/ramoops.txt``. + ``Documentation/device-tree/bindings/reserved-memory/admin-guide/ramoops.rst``. For example:: reserved-memory { diff --git a/Documentation/admin-guide/reporting-bugs.rst b/Documentation/admin-guide/reporting-bugs.rst index 05c53ac7fa76..0c0f2698ec5a 100644 --- a/Documentation/admin-guide/reporting-bugs.rst +++ b/Documentation/admin-guide/reporting-bugs.rst @@ -61,7 +61,7 @@ files to the get_maintainer.pl script:: If it is a security bug, please copy the Security Contact listed in the MAINTAINERS file. They can help coordinate bugfix and disclosure. See -:ref:`Documentation/SecurityBugs ` for more information. +:ref:`Documentation/admin-guide/security-bugs.rst ` for more information. If you can't figure out which subsystem caused the issue, you should file a bug in kernel.org bugzilla and send email to @@ -94,7 +94,7 @@ step-by-step instructions for how a user can trigger the bug. If the failure includes an "OOPS:", take a picture of the screen, capture a netconsole trace, or type the message from your screen into the bug -report. Please read "Documentation/oops-tracing.txt" before posting your +report. Please read "Documentation/admin-guide/oops-tracing.rst" before posting your bug report. This explains what you should do with the "Oops" information to make it useful to the recipient. @@ -120,7 +120,7 @@ summary from [1.]>" for easy identification by the developers:: [4.2.] Kernel .config file: [5.] Most recent kernel version which did not have the bug: [6.] Output of Oops.. message (if applicable) with symbolic information - resolved (see Documentation/oops-tracing.txt) + resolved (see Documentation/admin-guide/oops-tracing.rst) [7.] A small shell script or example program which triggers the problem (if possible) [8.] Environment diff --git a/Documentation/admin-guide/security-bugs.rst b/Documentation/admin-guide/security-bugs.rst index df795e22d08b..4f7414cad586 100644 --- a/Documentation/admin-guide/security-bugs.rst +++ b/Documentation/admin-guide/security-bugs.rst @@ -19,7 +19,7 @@ area maintainers to understand and fix the security vulnerability. As it is with any bug, the more information provided the easier it will be to diagnose and fix. Please review the procedure outlined in -REPORTING-BUGS if you are unclear about what information is helpful. +admin-guide/reporting-bugs.rst if you are unclear about what information is helpful. Any exploit code is very helpful and will not be released without consent from the reporter unless it has already been made public. diff --git a/Documentation/admin-guide/unicode.rst b/Documentation/admin-guide/unicode.rst index 012e8e895842..4e5c3df9d55f 100644 --- a/Documentation/admin-guide/unicode.rst +++ b/Documentation/admin-guide/unicode.rst @@ -7,7 +7,7 @@ This file is maintained by H. Peter Anvin as part of the Linux Assigned Names And Numbers Authority (LANANA) project. The current version can be found at: - http://www.lanana.org/docs/unicode/unicode.txt + http://www.lanana.org/docs/unicode/admin-guide/unicode.rst Introdution ----------- diff --git a/Documentation/arm/Booting b/Documentation/arm/Booting index 83c1df2fc758..259f00af3ab3 100644 --- a/Documentation/arm/Booting +++ b/Documentation/arm/Booting @@ -51,7 +51,7 @@ As an alternative, the boot loader can pass the relevant 'console=' option to the kernel via the tagged lists specifying the port, and serial format options as described in - Documentation/kernel-parameters.txt. + Documentation/admin-guide/kernel-parameters.rst. 3. Detect the machine type diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt index c9d1cacb4395..7281bf939779 100644 --- a/Documentation/atomic_ops.txt +++ b/Documentation/atomic_ops.txt @@ -16,7 +16,7 @@ will fail. Something like the following should suffice: typedef struct { long counter; } atomic_long_t; Historically, counter has been declared volatile. This is now discouraged. -See Documentation/volatile-considered-harmful.txt for the complete rationale. +See Documentation/process/volatile-considered-harmful.rst for the complete rationale. local_t is very similar to atomic_t. If the counter is per CPU and only updated by one CPU, local_t is probably more appropriate. Please see diff --git a/Documentation/blockdev/ramdisk.txt b/Documentation/blockdev/ramdisk.txt index fe2ef978d85a..501e12e0323e 100644 --- a/Documentation/blockdev/ramdisk.txt +++ b/Documentation/blockdev/ramdisk.txt @@ -14,7 +14,7 @@ Contents: The RAM disk driver is a way to use main system memory as a block device. It is required for initrd, an initial filesystem used if you need to load modules -in order to access the root filesystem (see Documentation/initrd.txt). It can +in order to access the root filesystem (see Documentation/admin-guide/initrd.rst). It can also be used for a temporary filesystem for crypto work, since the contents are erased on reboot. diff --git a/Documentation/cgroup-v1/00-INDEX b/Documentation/cgroup-v1/00-INDEX index 106885ad670d..13e0c85e7b35 100644 --- a/Documentation/cgroup-v1/00-INDEX +++ b/Documentation/cgroup-v1/00-INDEX @@ -8,7 +8,7 @@ cpuacct.txt - CPU Accounting Controller; account CPU usage for groups of tasks. cpusets.txt - documents the cpusets feature; assign CPUs and Mem to a set of tasks. -devices.txt +admin-guide/devices.rst - Device Whitelist Controller; description, interface and security. freezer-subsystem.txt - checkpointing; rationale to not use signals, interface. diff --git a/Documentation/devicetree/bindings/rtc/maxim,ds3231.txt b/Documentation/devicetree/bindings/rtc/maxim,ds3231.txt index ddef330d2709..1ad4c1c2b3b3 100644 --- a/Documentation/devicetree/bindings/rtc/maxim,ds3231.txt +++ b/Documentation/devicetree/bindings/rtc/maxim,ds3231.txt @@ -1,7 +1,7 @@ * Maxim DS3231 Real Time Clock Required properties: -see: Documentation/devicetree/bindings/i2c/trivial-devices.txt +see: Documentation/devicetree/bindings/i2c/trivial-admin-guide/devices.rst Optional property: - #clock-cells: Should be 1. diff --git a/Documentation/devicetree/bindings/rtc/pcf8563.txt b/Documentation/devicetree/bindings/rtc/pcf8563.txt index 72f6d2c9665e..086c998c5561 100644 --- a/Documentation/devicetree/bindings/rtc/pcf8563.txt +++ b/Documentation/devicetree/bindings/rtc/pcf8563.txt @@ -3,7 +3,7 @@ Philips PCF8563/Epson RTC8564 Real Time Clock Required properties: -see: Documentation/devicetree/bindings/i2c/trivial-devices.txt +see: Documentation/devicetree/bindings/i2c/trivial-admin-guide/devices.rst Optional property: - #clock-cells: Should be 0. diff --git a/Documentation/devicetree/bindings/submitting-patches.txt b/Documentation/devicetree/bindings/submitting-patches.txt index 7d44eae7ab0b..274058c583dd 100644 --- a/Documentation/devicetree/bindings/submitting-patches.txt +++ b/Documentation/devicetree/bindings/submitting-patches.txt @@ -3,7 +3,7 @@ I. For patch submitters - 0) Normal patch submission rules from Documentation/SubmittingPatches + 0) Normal patch submission rules from Documentation/process/submitting-patches.rst applies. 1) The Documentation/ portion of the patch should be a separate patch. diff --git a/Documentation/filesystems/locks.txt b/Documentation/filesystems/locks.txt index 2cf81082581d..5368690f412e 100644 --- a/Documentation/filesystems/locks.txt +++ b/Documentation/filesystems/locks.txt @@ -19,7 +19,7 @@ forever. This should not cause problems for anybody, since everybody using a 2.1.x kernel should have updated their C library to a suitable version -anyway (see the file "Documentation/Changes".) +anyway (see the file "Documentation/process/changes.rst".) 1.2 Allow Mixed Locks Again --------------------------- diff --git a/Documentation/filesystems/nfs/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt index 0b2883b17d4c..5efae00f6c7f 100644 --- a/Documentation/filesystems/nfs/nfsroot.txt +++ b/Documentation/filesystems/nfs/nfsroot.txt @@ -11,7 +11,7 @@ Updated 2006 by Horms In order to use a diskless system, such as an X-terminal or printer server for example, it is necessary for the root filesystem to be present on a non-disk device. This may be an initramfs (see Documentation/filesystems/ -ramfs-rootfs-initramfs.txt), a ramdisk (see Documentation/initrd.txt) or a +ramfs-rootfs-initramfs.txt), a ramdisk (see Documentation/admin-guide/initrd.rst) or a filesystem mounted via NFS. The following text describes on how to use NFS for the root filesystem. For the rest of this text 'client' means the diskless system, and 'server' means the NFS server. @@ -284,7 +284,7 @@ They depend on various facilities being available: "kernel ". The nfsroot parameters are passed to the kernel by adding them to the "append" line. It is common to use serial console in conjunction with pxeliunx, - see Documentation/serial-console.txt for more information. + see Documentation/admin-guide/serial-console.rst for more information. For more information on isolinux, including how to create bootdisks for prebuilt kernels, see http://syslinux.zytor.com/ diff --git a/Documentation/frv/booting.txt b/Documentation/frv/booting.txt index 9bdf4b46e741..cd9dc1dfb144 100644 --- a/Documentation/frv/booting.txt +++ b/Documentation/frv/booting.txt @@ -119,7 +119,7 @@ separated by spaces: 253:0 Device with major 253 and minor 0 Authoritative information can be found in - "Documentation/kernel-parameters.txt". + "Documentation/admin-guide/kernel-parameters.rst". (*) rw diff --git a/Documentation/hwmon/submitting-patches b/Documentation/hwmon/submitting-patches index 57f60307accc..f88221b46153 100644 --- a/Documentation/hwmon/submitting-patches +++ b/Documentation/hwmon/submitting-patches @@ -10,10 +10,10 @@ increase the chances of your change being accepted. ---------- * It should be unnecessary to mention, but please read and follow - Documentation/SubmitChecklist - Documentation/SubmittingDrivers - Documentation/SubmittingPatches - Documentation/CodingStyle + Documentation/process/submit-checklist.rst + Documentation/process/submitting-drivers.rst + Documentation/process/submitting-patches.rst + Documentation/process/coding-style.rst * Please run your patch through 'checkpatch --strict'. There should be no errors, no warnings, and few if any check messages. If there are any diff --git a/Documentation/isdn/README b/Documentation/isdn/README index cfb1884342ee..32d4e80c2c03 100644 --- a/Documentation/isdn/README +++ b/Documentation/isdn/README @@ -332,7 +332,7 @@ README for the ISDN-subsystem 4. Device-inodes The major and minor numbers and their names are described in - Documentation/devices.txt. The major numbers are: + Documentation/admin-guide/devices.rst. The major numbers are: 43 for the ISDN-tty's. 44 for the ISDN-callout-tty's. diff --git a/Documentation/ja_JP/HOWTO b/Documentation/ja_JP/HOWTO index 581c14bdd7be..b03fc8047f03 100644 --- a/Documentation/ja_JP/HOWTO +++ b/Documentation/ja_JP/HOWTO @@ -127,15 +127,15 @@ linux-api@ver.kernel.org に送ることを勧めます。 小限のレベルで必要な数々のソフトウェアパッケージの一覧を示してい ます。 - Documentation/CodingStyle + Documentation/process/coding-style.rst これは Linux カーネルのコーディングスタイルと背景にある理由を記述 しています。全ての新しいコードはこのドキュメントにあるガイドライン に従っていることを期待されています。大部分のメンテナはこれらのルー ルに従っているものだけを受け付け、多くの人は正しいスタイルのコード だけをレビューします。 - Documentation/SubmittingPatches - Documentation/SubmittingDrivers + Documentation/process/submitting-patches.rst + Documentation/process/submitting-drivers.rst これらのファイルには、どうやってうまくパッチを作って投稿するかに ついて非常に詳しく書かれており、以下を含みます(これだけに限らない けれども) @@ -153,7 +153,7 @@ linux-api@ver.kernel.org に送ることを勧めます。 "Linux kernel patch submission format" http://linux.yyz.us/patch-format.html - Documentation/stable_api_nonsense.txt + Documentation/process/stable-api-nonsense.rst このファイルはカーネルの中に不変のAPIを持たないことにした意識的な 決断の背景にある理由について書かれています。以下のようなことを含 んでいます- @@ -164,29 +164,29 @@ linux-api@ver.kernel.org に送ることを勧めます。 このドキュメントは Linux 開発の思想を理解するのに非常に重要です。 そして、他のOSでの開発者が Linux に移る時にとても重要です。 - Documentation/SecurityBugs + Documentation/admin-guide/security-bugs.rst もし Linux カーネルでセキュリティ問題を発見したように思ったら、こ のドキュメントのステップに従ってカーネル開発者に連絡し、問題解決を 支援してください。 - Documentation/ManagementStyle + Documentation/process/management-style.rst このドキュメントは Linux カーネルのメンテナ達がどう行動するか、 彼らの手法の背景にある共有されている精神について記述しています。こ れはカーネル開発の初心者なら(もしくは、単に興味があるだけの人でも) 重要です。なぜならこのドキュメントは、カーネルメンテナ達の独特な 行動についての多くの誤解や混乱を解消するからです。 - Documentation/stable_kernel_rules.txt + Documentation/process/stable-kernel-rules.rst このファイルはどのように stable カーネルのリリースが行われるかのルー ルが記述されています。そしてこれらのリリースの中のどこかで変更を取 り入れてもらいたい場合に何をすれば良いかが示されています。 - Documentation/kernel-docs.txt + Documentation/process/kernel-docs.rst   カーネル開発に付随する外部ドキュメントのリストです。もしあなたが 探しているものがカーネル内のドキュメントでみつからなかった場合、 このリストをあたってみてください。 - Documentation/applying-patches.txt + Documentation/process/applying-patches.rst パッチとはなにか、パッチをどうやって様々なカーネルの開発ブランチに 適用するのかについて正確に記述した良い入門書です。 @@ -314,7 +314,7 @@ Andrew Morton が Linux-kernel メーリングリストにカーネルリリー た問題がなければもう少し長くなることもあります。セキュリティ関連の問題 の場合はこれに対してだいたいの場合、すぐにリリースがされます。 -カーネルツリーに入っている、Documentation/stable_kernel_rules.txt ファ +カーネルツリーに入っている、Documentation/process/stable-kernel-rules.rst ファ イルにはどのような種類の変更が -stable ツリーに受け入れ可能か、またリ リースプロセスがどう動くかが記述されています。 @@ -372,7 +372,7 @@ bugzilla.kernel.org は Linux カーネル開発者がカーネルのバグを 場所です。ユーザは見つけたバグの全てをこのツールで報告すべきです。 どう kernel bugzilla を使うかの詳細は、以下を参照してください- http://bugzilla.kernel.org/page.cgi?id=faq.html -メインカーネルソースディレクトリにあるファイル REPORTING-BUGS はカーネ +メインカーネルソースディレクトリにあるファイル admin-guide/reporting-bugs.rst はカーネ ルバグらしいものについてどうレポートするかの良いテンプレートであり、問 題の追跡を助けるためにカーネル開発者にとってどんな情報が必要なのかの詳 細が書かれています。 @@ -438,7 +438,7 @@ MAINTAINERS ファイルにリストがありますので参照してくださ メールの先頭でなく、各引用行の間にあなたの言いたいことを追加するべきで す。 -もしパッチをメールに付ける場合は、Documentation/SubmittingPatches に提 +もしパッチをメールに付ける場合は、Documentation/process/submitting-patches.rst に提 示されているように、それは プレーンな可読テキストにすることを忘れない ようにしましょう。カーネル開発者は 添付や圧縮したパッチを扱いたがりま せん- diff --git a/Documentation/ja_JP/SubmitChecklist b/Documentation/ja_JP/SubmitChecklist index cb5507b1ac81..60c7c35ac517 100644 --- a/Documentation/ja_JP/SubmitChecklist +++ b/Documentation/ja_JP/SubmitChecklist @@ -1,5 +1,5 @@ NOTE: -This is a version of Documentation/SubmitChecklist into Japanese. +This is a version of Documentation/process/submit-checklist.rst into Japanese. This document is maintained by Takenori Nagano and the JF Project team . If you find any difference between this document and the original file @@ -14,7 +14,7 @@ to update the original English file first. Last Updated: 2008/07/14 ================================== これは、 -linux-2.6.26/Documentation/SubmitChecklist の和訳です。 +linux-2.6.26/Documentation/process/submit-checklist.rst の和訳です。 翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ > 翻訳日: 2008/07/14 @@ -27,7 +27,7 @@ Linux カーネルパッチ投稿者向けチェックリスト ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 本書では、パッチをより素早く取り込んでもらいたい開発者が実践すべき基本的な事柄 -をいくつか紹介します。ここにある全ての事柄は、Documentation/SubmittingPatches +をいくつか紹介します。ここにある全ての事柄は、Documentation/process/submitting-patches.rst などのLinuxカーネルパッチ投稿に際しての心得を補足するものです。 1: 妥当なCONFIGオプションや変更されたCONFIGオプション、つまり =y, =m, =n @@ -84,7 +84,7 @@ Linux カーネルパッチ投稿者向けチェックリスト 必ずドキュメントを追加してください。 17: 新しいブートパラメータを追加した場合には、 - 必ずDocumentation/kernel-parameters.txt に説明を追加してください。 + 必ずDocumentation/admin-guide/kernel-parameters.rst に説明を追加してください。 18: 新しくmoduleにパラメータを追加した場合には、MODULE_PARM_DESC()を 利用して必ずその説明を記述してください。 diff --git a/Documentation/ja_JP/SubmittingPatches b/Documentation/ja_JP/SubmittingPatches index 5d6ae639bfa0..02139656463e 100644 --- a/Documentation/ja_JP/SubmittingPatches +++ b/Documentation/ja_JP/SubmittingPatches @@ -1,5 +1,5 @@ NOTE: -This is a version of Documentation/SubmittingPatches into Japanese. +This is a version of Documentation/process/submitting-patches.rst into Japanese. This document is maintained by Keiichi KII and the JF Project team . If you find any difference between this document and the original file @@ -15,7 +15,7 @@ Last Updated: 2011/06/09 ================================== これは、 -linux-2.6.39/Documentation/SubmittingPatches の和訳 +linux-2.6.39/Documentation/process/submitting-patches.rst の和訳 です。 翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ > 翻訳日: 2011/06/09 @@ -34,9 +34,9 @@ Linux カーネルに変更を加えたいと思っている個人又は会社 おじけづかせることもあります。この文章はあなたの変更を大いに受け入れ てもらえやすくする提案を集めたものです。 -コードを投稿する前に、Documentation/SubmitChecklist の項目リストに目 +コードを投稿する前に、Documentation/process/submit-checklist.rst の項目リストに目 を通してチェックしてください。もしあなたがドライバーを投稿しようとし -ているなら、Documentation/SubmittingDrivers にも目を通してください。 +ているなら、Documentation/process/submitting-drivers.rst にも目を通してください。 -------------------------------------------- セクション1 パッチの作り方と送り方 @@ -148,7 +148,7 @@ http://savannah.nongnu.org/projects/quilt 4) パッチのスタイルチェック あなたのパッチが基本的な( Linux カーネルの)コーディングスタイルに違反し -ていないかをチェックして下さい。その詳細を Documentation/CodingStyle で +ていないかをチェックして下さい。その詳細を Documentation/process/coding-style.rst で 見つけることができます。コーディングスタイルの違反はレビューする人の 時間を無駄にするだけなので、恐らくあなたのパッチは読まれることすらなく 拒否されるでしょう。 @@ -246,7 +246,7 @@ MIME 形式の添付ファイルは Linus に手間を取らせることにな あれば、誰かが MIME 形式のパッチを再送するよう求めるかもしれません。 余計な変更を加えずにあなたのパッチを送信するための電子メールクライアントの設定 -のヒントについては Documentation/email-clients.txt を参照してください。 +のヒントについては Documentation/process/email-clients.rst を参照してください。 8) 電子メールのサイズ @@ -609,7 +609,7 @@ diffstat の結果を生成するために「 git diff -M --stat --summary 」 し例外を適用するには、本当に妥当な理由が不可欠です。あなたは恐らくこの セクションを Linus のコンピュータ・サイエンス101と呼ぶでしょう。 -1) Documentation/CodingStyleを参照 +1) Documentation/process/coding-style.rstを参照 言うまでもなく、あなたのコードがこのコーディングスタイルからあまりに も逸脱していると、レビューやコメントなしに受け取ってもらえないかもし @@ -704,8 +704,8 @@ Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer". NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people! -Kernel Documentation/CodingStyle: - +Kernel Documentation/process/coding-style.rst: + Linus Torvalds's mail on the canonical patch format: diff --git a/Documentation/ja_JP/stable_api_nonsense.txt b/Documentation/ja_JP/stable_api_nonsense.txt index 7653b5cbfed2..a3b40a4bdcfd 100644 --- a/Documentation/ja_JP/stable_api_nonsense.txt +++ b/Documentation/ja_JP/stable_api_nonsense.txt @@ -1,5 +1,5 @@ NOTE: -This is a version of Documentation/stable_api_nonsense.txt into Japanese. +This is a version of Documentation/process/stable-api-nonsense.rst into Japanese. This document is maintained by IKEDA, Munehiro and the JF Project team . If you find any difference between this document and the original file @@ -14,7 +14,7 @@ to update the original English file first. Last Updated: 2007/07/18 ================================== これは、 -linux-2.6.22-rc4/Documentation/stable_api_nonsense.txt の和訳 +linux-2.6.22-rc4/Documentation/process/stable-api-nonsense.rst の和訳 です。 翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ > 翻訳日 : 2007/06/11 diff --git a/Documentation/ja_JP/stable_kernel_rules.txt b/Documentation/ja_JP/stable_kernel_rules.txt index 9dbda9b5d21e..f9249aecba64 100644 --- a/Documentation/ja_JP/stable_kernel_rules.txt +++ b/Documentation/ja_JP/stable_kernel_rules.txt @@ -1,5 +1,5 @@ NOTE: -This is Japanese translated version of "Documentation/stable_kernel_rules.txt". +This is Japanese translated version of "Documentation/process/stable-kernel-rules.rst". This one is maintained by Tsugikazu Shibata and JF Project team . If you find difference with original file or problem in translation, @@ -12,7 +12,7 @@ file at first. ================================== これは、 -linux-2.6.29/Documentation/stable_kernel_rules.txt +linux-2.6.29/Documentation/process/stable-kernel-rules.rst の和訳です。 翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ > @@ -43,7 +43,7 @@ linux-2.6.29/Documentation/stable_kernel_rules.txt "理論的には競合状態になる"ようなものは不可。 - いかなる些細な修正も含めることはできない。(スペルの修正、空白のクリー ンアップなど) - - Documentation/SubmittingPatches の規則に従ったものでなければならない。 + - Documentation/process/submitting-patches.rst の規則に従ったものでなければならない。 - パッチ自体か同等の修正が Linus のツリーに既に存在しなければならない。   Linus のツリーでのコミットID を -stable へのパッチ投稿の際に引用す ること。 diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt index bbc3a8b8cff4..df31e30b6a02 100644 --- a/Documentation/kernel-per-CPU-kthreads.txt +++ b/Documentation/kernel-per-CPU-kthreads.txt @@ -264,7 +264,7 @@ To reduce its OS jitter, do at least one of the following: kthreads from being created in the first place. 2. Boot with "nosoftlockup=0", which will also prevent these kthreads from being created. Other related watchdog and softlockup boot - parameters may be found in Documentation/kernel-parameters.txt + parameters may be found in Documentation/admin-guide/kernel-parameters.rst and Documentation/watchdog/watchdog-parameters.txt. 3. Echo a zero to /proc/sys/kernel/watchdog to disable the watchdog timer. diff --git a/Documentation/ko_KR/HOWTO b/Documentation/ko_KR/HOWTO index 9a3e65924d54..025252731af5 100644 --- a/Documentation/ko_KR/HOWTO +++ b/Documentation/ko_KR/HOWTO @@ -1,5 +1,5 @@ NOTE: -This is a version of Documentation/HOWTO translated into korean +This is a version of Documentation/process/howto.rst translated into korean This document is maintained by Minchan Kim If you find any difference between this document and the original file or a problem with the translation, please contact the maintainer of this file. @@ -11,7 +11,7 @@ try to update the original English file first. ================================== 이 문서는 -Documentation/HOWTO +Documentation/process/howto.rst 의 한글 번역입니다. 역자: 김민찬 @@ -98,18 +98,18 @@ mtk.manpages@gmail.com의 메인테이너에게 보낼 것을 권장한다. 빌드하기 위해 필요한 것을 설명한다. 커널에 입문하는 사람들은 여기서 시작해야 한다. - Documentation/Changes + Documentation/process/changes.rst 이 파일은 커널을 성공적으로 빌드하고 실행시키기 위해 필요한 다양한 소프트웨어 패키지들의 최소 버젼을 나열한다. - Documentation/CodingStyle + Documentation/process/coding-style.rst 이 문서는 리눅스 커널 코딩 스타일과 그렇게 한 몇몇 이유를 설명한다. 모든 새로운 코드는 이 문서에 가이드라인들을 따라야 한다. 대부분의 메인테이너들은 이 규칙을 따르는 패치들만을 받아들일 것이고 많은 사람들이 그 패치가 올바른 스타일일 경우만 코드를 검토할 것이다. - Documentation/SubmittingPatches - Documentation/SubmittingDrivers + Documentation/process/submitting-patches.rst + Documentation/process/submitting-drivers.rst 이 파일들은 성공적으로 패치를 만들고 보내는 법을 다음의 내용들로 굉장히 상세히 설명하고 있다(그러나 다음으로 한정되진 않는다). - Email 내용들 @@ -126,7 +126,7 @@ mtk.manpages@gmail.com의 메인테이너에게 보낼 것을 권장한다. "Linux kernel patch submission format" http://linux.yyz.us/patch-format.html - Documentation/stable_api_nonsense.txt + Documentation/process/stable-api-nonsense.rst 이 문서는 의도적으로 커널이 불변하는 API를 갖지 않도록 결정한 이유를 설명하며 다음과 같은 것들을 포함한다. - 서브시스템 shim-layer(호환성을 위해?) @@ -136,12 +136,12 @@ mtk.manpages@gmail.com의 메인테이너에게 보낼 것을 권장한다. 리눅스로 전향하는 사람들에게는 매우 중요하다. - Documentation/SecurityBugs + Documentation/admin-guide/security-bugs.rst 여러분들이 리눅스 커널의 보안 문제를 발견했다고 생각한다면 이 문서에 나온 단계에 따라서 커널 개발자들에게 알리고 그 문제를 해결할 수 있도록 도와 달라. - Documentation/ManagementStyle + Documentation/process/management-style.rst 이 문서는 리눅스 커널 메인테이너들이 그들의 방법론에 녹아 있는 정신을 어떻게 공유하고 운영하는지를 설명한다. 이것은 커널 개발에 입문하는 모든 사람들(또는 커널 개발에 작은 호기심이라도 있는 사람들)이 @@ -149,17 +149,17 @@ mtk.manpages@gmail.com의 메인테이너에게 보낼 것을 권장한다. 독특한 행동에 관하여 흔히 있는 오해들과 혼란들을 해소하고 있기 때문이다. - Documentation/stable_kernel_rules.txt + Documentation/process/stable-kernel-rules.rst 이 문서는 안정적인 커널 배포가 이루어지는 규칙을 설명하고 있으며 여러분들이 이러한 배포들 중 하나에 변경을 하길 원한다면 무엇을 해야 하는지를 설명한다. - Documentation/kernel-docs.txt + Documentation/process/kernel-docs.rst 커널 개발에 관계된 외부 문서의 리스트이다. 커널 내의 포함된 문서들 중에 여러분이 찾고 싶은 문서를 발견하지 못할 경우 이 리스트를 살펴보라. - Documentation/applying-patches.txt + Documentation/process/applying-patches.rst 패치가 무엇이며 그것을 커널의 다른 개발 브랜치들에 어떻게 적용하는지에 관하여 자세히 설명하고 있는 좋은 입문서이다. @@ -276,7 +276,7 @@ Andrew Morton의 글이 있다. 4.x.y는 "stable" 팀에 의해 관리되며 거의 매번 격주로 배포된다. -커널 트리 문서들 내에 Documentation/stable_kernel_rules.txt 파일은 어떤 +커널 트리 문서들 내에 Documentation/process/stable-kernel-rules.rst 파일은 어떤 종류의 변경들이 -stable 트리로 들어왔는지와 배포 프로세스가 어떻게 진행되는지를 설명한다. @@ -328,7 +328,7 @@ bugzilla.kernel.org는 리눅스 커널 개발자들이 커널의 버그를 추 kernel bugzilla를 사용하는 자세한 방법은 다음을 참조하라. http://test.kernel.org/bugzilla/faq.html -메인 커널 소스 디렉토리에 있는 REPORTING-BUGS 파일은 커널 버그라고 생각되는 +메인 커널 소스 디렉토리에 있는 admin-guide/reporting-bugs.rst 파일은 커널 버그라고 생각되는 것을 보고하는 방법에 관한 좋은 템플릿이며 문제를 추적하기 위해서 커널 개발자들이 필요로 하는 정보가 무엇들인지를 상세히 설명하고 있다. @@ -391,7 +391,7 @@ bugme-janitor 메일링 리스트(bugzilla에 모든 변화들이 여기서 메 "John 커널해커는 작성했다...."를 유지하며 여러분들의 의견을 그 메일의 윗부분에 작성하지 말고 각 인용한 단락들 사이에 넣어라. -여러분들이 패치들을 메일에 넣는다면 그것들은 Documentation/SubmittingPatches에 +여러분들이 패치들을 메일에 넣는다면 그것들은 Documentation/process/submitting-patches.rst에 나와있는데로 명백히(plain) 읽을 수 있는 텍스트여야 한다. 커널 개발자들은 첨부파일이나 압축된 패치들을 원하지 않는다. 그들은 여러분들의 패치의 각 라인 단위로 코멘트를 하길 원하며 압축하거나 첨부하지 않고 보내는 것이 diff --git a/Documentation/ko_KR/stable_api_nonsense.txt b/Documentation/ko_KR/stable_api_nonsense.txt index 3ba10b11d556..4d93af1efd61 100644 --- a/Documentation/ko_KR/stable_api_nonsense.txt +++ b/Documentation/ko_KR/stable_api_nonsense.txt @@ -1,5 +1,5 @@ NOTE: -This is a version of Documentation/stable_api_nonsense.txt translated +This is a version of Documentation/process/stable-api-nonsense.rst translated into korean This document is maintained by Minchan Kim If you find any difference between this document and the original file or @@ -12,7 +12,7 @@ try to update the original English file first. ================================== 이 문서는 -Documentation/stable_api_nonsense.txt +Documentation/process/stable-api-nonsense.rst 의 한글 번역입니다. 역자: 김민찬 diff --git a/Documentation/lockup-watchdogs.txt b/Documentation/lockup-watchdogs.txt index 4a6e33e1af61..c8b8378513d6 100644 --- a/Documentation/lockup-watchdogs.txt +++ b/Documentation/lockup-watchdogs.txt @@ -11,7 +11,7 @@ details), without giving other tasks a chance to run. The current stack trace is displayed upon detection and, by default, the system will stay locked up. Alternatively, the kernel can be configured to panic; a sysctl, "kernel.softlockup_panic", a kernel parameter, -"softlockup_panic" (see "Documentation/kernel-parameters.txt" for +"softlockup_panic" (see "Documentation/admin-guide/kernel-parameters.rst" for details), and a compile option, "BOOTPARAM_SOFTLOCKUP_PANIC", are provided for this. @@ -23,7 +23,7 @@ upon detection and the system will stay locked up unless the default behavior is changed, which can be done through a sysctl, 'hardlockup_panic', a compile time knob, "BOOTPARAM_HARDLOCKUP_PANIC", and a kernel parameter, "nmi_watchdog" -(see "Documentation/kernel-parameters.txt" for details). +(see "Documentation/admin-guide/kernel-parameters.rst" for details). The panic option can be used in combination with panic_timeout (this timeout is set through the confusingly named "kernel.panic" sysctl), diff --git a/Documentation/m68k/kernel-options.txt b/Documentation/m68k/kernel-options.txt index eaf32a1fd0b1..79d21246c75a 100644 --- a/Documentation/m68k/kernel-options.txt +++ b/Documentation/m68k/kernel-options.txt @@ -139,7 +139,7 @@ follows: PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=-2 Authoritative information can be found in -"Documentation/kernel-parameters.txt". +"Documentation/admin-guide/kernel-parameters.rst". 2.2) ro, rw diff --git a/Documentation/media/uapi/v4l/diff-v4l.rst b/Documentation/media/uapi/v4l/diff-v4l.rst index 76b2ecab8657..8209eeb63dd2 100644 --- a/Documentation/media/uapi/v4l/diff-v4l.rst +++ b/Documentation/media/uapi/v4l/diff-v4l.rst @@ -648,12 +648,12 @@ microcode programming. A new interface for MPEG compression and playback devices is documented in :ref:`extended-controls`. .. [#f1] - According to Documentation/devices.txt these should be symbolic links + According to Documentation/admin-guide/devices.rst these should be symbolic links to ``/dev/video0``. Note the original bttv interface is not compatible with V4L or V4L2. .. [#f2] - According to ``Documentation/devices.txt`` a symbolic link to + According to ``Documentation/admin-guide/devices.rst`` a symbolic link to ``/dev/radio0``. .. [#f3] diff --git a/Documentation/media/v4l-drivers/bttv.rst b/Documentation/media/v4l-drivers/bttv.rst index 7abc1c9a261b..bc63b12efafd 100644 --- a/Documentation/media/v4l-drivers/bttv.rst +++ b/Documentation/media/v4l-drivers/bttv.rst @@ -304,10 +304,10 @@ bug. It is very helpful if you can tell where exactly it broke With a hard freeze you probably doesn't find anything in the logfiles. The only way to capture any kernel messages is to hook up a serial console and let some terminal application log the messages. /me uses -screen. See Documentation/serial-console.txt for details on setting +screen. See Documentation/admin-guide/serial-console.rst for details on setting up a serial console. -Read Documentation/oops-tracing.txt to learn how to get any useful +Read Documentation/admin-guide/oops-tracing.rst to learn how to get any useful information out of a register+stack dump printed by the kernel on protection faults (so-called "kernel oops"). diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt index 0d7cb955aa01..5de846d3ecc0 100644 --- a/Documentation/memory-hotplug.txt +++ b/Documentation/memory-hotplug.txt @@ -324,7 +324,7 @@ guarantee that the memory block contains only migratable pages. Now, a boot option for making a memory block which consists of migratable pages is supported. By specifying "kernelcore=" or "movablecore=" boot option, you can create ZONE_MOVABLE...a zone which is just used for movable pages. -(See also Documentation/kernel-parameters.txt) +(See also Documentation/admin-guide/kernel-parameters.rst) Assume the system has "TOTAL" amount of memory at boot time, this boot option creates ZONE_MOVABLE as following. diff --git a/Documentation/networking/netconsole.txt b/Documentation/networking/netconsole.txt index 30409a36e95d..296ea00fd3eb 100644 --- a/Documentation/networking/netconsole.txt +++ b/Documentation/networking/netconsole.txt @@ -200,7 +200,7 @@ priority messages to the console. You can change this at runtime using: or by specifying "debug" on the kernel command line at boot, to send all kernel messages to the console. A specific value for this parameter can also be set using the "loglevel" kernel boot option. See the -dmesg(8) man page and Documentation/kernel-parameters.txt for details. +dmesg(8) man page and Documentation/admin-guide/kernel-parameters.rst for details. Netconsole was designed to be as instantaneous as possible, to enable the logging of even the most critical kernel bugs. It works diff --git a/Documentation/networking/netdev-FAQ.txt b/Documentation/networking/netdev-FAQ.txt index 0fe1c6e0dbcd..cdebc5c8705f 100644 --- a/Documentation/networking/netdev-FAQ.txt +++ b/Documentation/networking/netdev-FAQ.txt @@ -136,14 +136,14 @@ A: Normally Greg Kroah-Hartman collects stable commits himself, but Q: I see a network patch and I think it should be backported to stable. Should I request it via "stable@vger.kernel.org" like the references in - the kernel's Documentation/stable_kernel_rules.txt file say? + the kernel's Documentation/process/stable-kernel-rules.rst file say? A: No, not for networking. Check the stable queues as per above 1st to see if it is already queued. If not, then send a mail to netdev, listing the upstream commit ID and why you think it should be a stable candidate. Before you jump to go do the above, do note that the normal stable rules - in Documentation/stable_kernel_rules.txt still apply. So you need to + in Documentation/process/stable-kernel-rules.rst still apply. So you need to explicitly indicate why it is a critical fix and exactly what users are impacted. In addition, you need to convince yourself that you _really_ think it has been overlooked, vs. having been considered and rejected. @@ -165,7 +165,7 @@ A: No. See above answer. In short, if you think it really belongs in If you think there is some valid information relating to it being in stable that does _not_ belong in the commit log, then use the three - dash marker line as described in Documentation/SubmittingPatches to + dash marker line as described in Documentation/process/submitting-patches.rst to temporarily embed that information into the patch that you send. Q: Someone said that the comment style and coding convention is different @@ -220,5 +220,5 @@ A: Attention to detail. Re-read your own work as if you were the If it is your first patch, mail it to yourself so you can test apply it to an unpatched tree to confirm infrastructure didn't mangle it. - Finally, go back and read Documentation/SubmittingPatches to be + Finally, go back and read Documentation/process/submitting-patches.rst to be sure you are not repeating some common mistake documented there. diff --git a/Documentation/networking/vortex.txt b/Documentation/networking/vortex.txt index 97282da82b75..ad3dead052a4 100644 --- a/Documentation/networking/vortex.txt +++ b/Documentation/networking/vortex.txt @@ -364,7 +364,7 @@ steps you should take: - The contents of your report will vary a lot depending upon the problem. If it's a kernel crash then you should refer to the - REPORTING-BUGS file. + admin-guide/reporting-bugs.rst file. But for most problems it is useful to provide the following: diff --git a/Documentation/power/00-INDEX b/Documentation/power/00-INDEX index ad04cc8097ed..7cb6085839f3 100644 --- a/Documentation/power/00-INDEX +++ b/Documentation/power/00-INDEX @@ -6,7 +6,7 @@ basic-pm-debugging.txt - Debugging suspend and resume charger-manager.txt - Battery charger management. -devices.txt +admin-guide/devices.rst - How drivers interact with system-wide power management drivers-testing.txt - Testing suspend and resume support in device drivers diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.txt index 44558882aa60..85c746cbab2c 100644 --- a/Documentation/power/pci.txt +++ b/Documentation/power/pci.txt @@ -8,7 +8,7 @@ management. Based on previous work by Patrick Mochel This document only covers the aspects of power management specific to PCI devices. For general description of the kernel's interfaces related to device -power management refer to Documentation/power/devices.txt and +power management refer to Documentation/power/admin-guide/devices.rst and Documentation/power/runtime_pm.txt. --------------------------------------------------------------------------- @@ -417,7 +417,7 @@ pm->runtime_idle() callback. 2.4. System-Wide Power Transitions ---------------------------------- There are a few different types of system-wide power transitions, described in -Documentation/power/devices.txt. Each of them requires devices to be handled +Documentation/power/admin-guide/devices.rst. Each of them requires devices to be handled in a specific way and the PM core executes subsystem-level power management callbacks for this purpose. They are executed in phases such that each phase involves executing the same subsystem-level callback for every device belonging @@ -623,7 +623,7 @@ System restore requires a hibernation image to be loaded into memory and the pre-hibernation memory contents to be restored before the pre-hibernation system activity can be resumed. -As described in Documentation/power/devices.txt, the hibernation image is loaded +As described in Documentation/power/admin-guide/devices.rst, the hibernation image is loaded into memory by a fresh instance of the kernel, called the boot kernel, which in turn is loaded and run by a boot loader in the usual way. After the boot kernel has loaded the image, it needs to replace its own code and data with the code @@ -677,7 +677,7 @@ controlling the runtime power management of their devices. At the time of this writing there are two ways to define power management callbacks for a PCI device driver, the recommended one, based on using a -dev_pm_ops structure described in Documentation/power/devices.txt, and the +dev_pm_ops structure described in Documentation/power/admin-guide/devices.rst, and the "legacy" one, in which the .suspend(), .suspend_late(), .resume_early(), and .resume() callbacks from struct pci_driver are used. The legacy approach, however, doesn't allow one to define runtime power management callbacks and is @@ -1046,5 +1046,5 @@ PCI Local Bus Specification, Rev. 3.0 PCI Bus Power Management Interface Specification, Rev. 1.2 Advanced Configuration and Power Interface (ACPI) Specification, Rev. 3.0b PCI Express Base Specification, Rev. 2.0 -Documentation/power/devices.txt +Documentation/power/admin-guide/devices.rst Documentation/power/runtime_pm.txt diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index 1fd1fbe9ce95..4870980e967e 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -674,7 +674,7 @@ left in runtime suspend. If that happens, the PM core will not execute any system suspend and resume callbacks for all of those devices, except for the complete callback, which is then entirely responsible for handling the device as appropriate. This only applies to system suspend transitions that are not -related to hibernation (see Documentation/power/devices.txt for more +related to hibernation (see Documentation/power/admin-guide/devices.rst for more information). The PM core does its best to reduce the probability of race conditions between diff --git a/Documentation/power/swsusp-dmcrypt.txt b/Documentation/power/swsusp-dmcrypt.txt index 59931b46ff7e..b802fbfd95ef 100644 --- a/Documentation/power/swsusp-dmcrypt.txt +++ b/Documentation/power/swsusp-dmcrypt.txt @@ -8,7 +8,7 @@ Some prerequisites: You know how dm-crypt works. If not, visit the following web page: http://www.saout.de/misc/dm-crypt/ You have read Documentation/power/swsusp.txt and understand it. -You did read Documentation/initrd.txt and know how an initrd works. +You did read Documentation/admin-guide/initrd.rst and know how an initrd works. You know how to create or how to modify an initrd. Now your system is properly set up, your disk is encrypted except for diff --git a/Documentation/process/4.Coding.rst b/Documentation/process/4.Coding.rst index 9d5cef996f7f..983d628c1112 100644 --- a/Documentation/process/4.Coding.rst +++ b/Documentation/process/4.Coding.rst @@ -22,7 +22,7 @@ Coding style ************ The kernel has long had a standard coding style, described in -Documentation/CodingStyle. For much of that time, the policies described +Documentation/process/coding-style.rst. For much of that time, the policies described in that file were taken as being, at most, advisory. As a result, there is a substantial amount of code in the kernel which does not meet the coding style guidelines. The presence of that code leads to two independent @@ -343,7 +343,7 @@ user-space developers to know what they are working with. See Documentation/ABI/README for a description of how this documentation should be formatted and what information needs to be provided. -The file Documentation/kernel-parameters.txt describes all of the kernel's +The file Documentation/admin-guide/kernel-parameters.rst describes all of the kernel's boot-time parameters. Any patch which adds new parameters should add the appropriate entries to this file. diff --git a/Documentation/process/5.Posting.rst b/Documentation/process/5.Posting.rst index b511ddf7e82a..1b7728b19ea7 100644 --- a/Documentation/process/5.Posting.rst +++ b/Documentation/process/5.Posting.rst @@ -9,8 +9,8 @@ kernel. Unsurprisingly, the kernel development community has evolved a set of conventions and procedures which are used in the posting of patches; following them will make life much easier for everybody involved. This document will attempt to cover these expectations in reasonable detail; -more information can also be found in the files SubmittingPatches, -SubmittingDrivers, and SubmitChecklist in the kernel documentation +more information can also be found in the files process/submitting-patches.rst, +process/submitting-drivers.rst, and process/submit-checklist.rst in the kernel documentation directory. @@ -198,7 +198,7 @@ pass it to diff with the "-X" option. The tags mentioned above are used to describe how various developers have been associated with the development of this patch. They are described in -detail in the SubmittingPatches document; what follows here is a brief +detail in the process/submitting-patches.rst document; what follows here is a brief summary. Each of these lines has the format: :: @@ -210,7 +210,7 @@ The tags in common use are: - Signed-off-by: this is a developer's certification that he or she has the right to submit the patch for inclusion into the kernel. It is an agreement to the Developer's Certificate of Origin, the full text of - which can be found in Documentation/SubmittingPatches. Code without a + which can be found in Documentation/process/submitting-patches.rst. Code without a proper signoff cannot be merged into the mainline. - Acked-by: indicates an agreement by another developer (often a @@ -221,7 +221,7 @@ The tags in common use are: it to work. - Reviewed-by: the named developer has reviewed the patch for correctness; - see the reviewer's statement in Documentation/SubmittingPatches for more + see the reviewer's statement in Documentation/process/submitting-patches.rst for more detail. - Reported-by: names a user who reported a problem which is fixed by this @@ -248,7 +248,7 @@ take care of: be examined in any detail. If there is any doubt at all, mail the patch to yourself and convince yourself that it shows up intact. - Documentation/email-clients.txt has some helpful hints on making + Documentation/process/email-clients.rst has some helpful hints on making specific mail clients work for sending patches. - Are you sure your patch is free of silly mistakes? You should always diff --git a/Documentation/process/8.Conclusion.rst b/Documentation/process/8.Conclusion.rst index 23ec7cbc2d2b..1c7f54cd0261 100644 --- a/Documentation/process/8.Conclusion.rst +++ b/Documentation/process/8.Conclusion.rst @@ -5,9 +5,9 @@ For more information There are numerous sources of information on Linux kernel development and related topics. First among those will always be the Documentation -directory found in the kernel source distribution. The top-level HOWTO -file is an important starting point; SubmittingPatches and -SubmittingDrivers are also something which all kernel developers should +directory found in the kernel source distribution. The top-level process/howto.rst +file is an important starting point; process/submitting-patches.rst and +process/submitting-drivers.rst are also something which all kernel developers should read. Many internal kernel APIs are documented using the kerneldoc mechanism; "make htmldocs" or "make pdfdocs" can be used to generate those documents in HTML or PDF format (though the version of TeX shipped by some diff --git a/Documentation/process/adding-syscalls.rst b/Documentation/process/adding-syscalls.rst index f5b5b1aa51b3..8cc25a06f353 100644 --- a/Documentation/process/adding-syscalls.rst +++ b/Documentation/process/adding-syscalls.rst @@ -3,7 +3,7 @@ Adding a New System Call This document describes what's involved in adding a new system call to the Linux kernel, over and above the normal submission advice in -:ref:`Documentation/SubmittingPatches `. +:ref:`Documentation/process/submitting-patches.rst `. System Call Alternatives diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst index 9c61c039ccd9..968808bec407 100644 --- a/Documentation/process/coding-style.rst +++ b/Documentation/process/coding-style.rst @@ -1058,5 +1058,5 @@ gcc internals and indent, all available from http://www.gnu.org/manual/ WG14 is the international standardization working group for the programming language C, URL: http://www.open-std.org/JTC1/SC22/WG14/ -Kernel CodingStyle, by greg@kroah.com at OLS 2002: +Kernel process/coding-style.rst, by greg@kroah.com at OLS 2002: http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/ diff --git a/Documentation/process/howto.rst b/Documentation/process/howto.rst index 5f042349f987..3f66a1980726 100644 --- a/Documentation/process/howto.rst +++ b/Documentation/process/howto.rst @@ -90,19 +90,19 @@ required reading: what is necessary to do to configure and build the kernel. People who are new to the kernel should start here. - :ref:`Documentation/Changes ` + :ref:`Documentation/process/changes.rst ` This file gives a list of the minimum levels of various software packages that are necessary to build and run the kernel successfully. - :ref:`Documentation/CodingStyle ` + :ref:`Documentation/process/coding-style.rst ` This describes the Linux kernel coding style, and some of the rationale behind it. All new code is expected to follow the guidelines in this document. Most maintainers will only accept patches if these rules are followed, and many people will only review code if it is in the proper style. - :ref:`Documentation/SubmittingPatches ` and :ref:`Documentation/SubmittingDrivers ` + :ref:`Documentation/process/submitting-patches.rst ` and :ref:`Documentation/process/submitting-drivers.rst ` These files describe in explicit detail how to successfully create and send a patch, including (but not limited to): @@ -122,7 +122,7 @@ required reading: "Linux kernel patch submission format" http://linux.yyz.us/patch-format.html - :ref:`Documentation/stable_api_nonsense.txt ` + :ref:`Documentation/process/stable-api-nonsense.rst ` This file describes the rationale behind the conscious decision to not have a stable API within the kernel, including things like: @@ -135,29 +135,29 @@ required reading: philosophy and is very important for people moving to Linux from development on other Operating Systems. - :ref:`Documentation/SecurityBugs ` + :ref:`Documentation/admin-guide/security-bugs.rst ` If you feel you have found a security problem in the Linux kernel, please follow the steps in this document to help notify the kernel developers, and help solve the issue. - :ref:`Documentation/ManagementStyle ` + :ref:`Documentation/process/management-style.rst ` This document describes how Linux kernel maintainers operate and the shared ethos behind their methodologies. This is important reading for anyone new to kernel development (or anyone simply curious about it), as it resolves a lot of common misconceptions and confusion about the unique behavior of kernel maintainers. - :ref:`Documentation/stable_kernel_rules.txt ` + :ref:`Documentation/process/stable-kernel-rules.rst ` This file describes the rules on how the stable kernel releases happen, and what to do if you want to get a change into one of these releases. - :ref:`Documentation/kernel-docs.txt ` + :ref:`Documentation/process/kernel-docs.rst ` A list of external documentation that pertains to kernel development. Please consult this list if you do not find what you are looking for within the in-kernel documentation. - :ref:`Documentation/applying-patches.txt ` + :ref:`Documentation/process/applying-patches.rst ` A good introduction describing exactly what a patch is and how to apply it to the different development branches of the kernel. @@ -307,7 +307,7 @@ two weeks, but it can be longer if there are no pressing problems. A security-related problem, instead, can cause a release to happen almost instantly. -The file Documentation/stable_kernel_rules.txt in the kernel tree +The file Documentation/process/stable-kernel-rules.rst in the kernel tree documents what kinds of changes are acceptable for the -stable tree, and how the release process works. @@ -366,7 +366,7 @@ tool. For details on how to use the kernel bugzilla, please see: https://bugzilla.kernel.org/page.cgi?id=faq.html -The file REPORTING-BUGS in the main kernel source directory has a good +The file admin-guide/reporting-bugs.rst in the main kernel source directory has a good template for how to report a possible kernel bug, and details what kind of information is needed by the kernel developers to help track down the problem. @@ -440,7 +440,7 @@ add your statements between the individual quoted sections instead of writing at the top of the mail. If you add patches to your mail, make sure they are plain readable text -as stated in Documentation/SubmittingPatches. +as stated in Documentation/process/submitting-patches.rst. Kernel developers don't want to deal with attachments or compressed patches; they may want to comment on individual lines of your patch, which works only that way. Make sure you diff --git a/Documentation/process/management-style.rst b/Documentation/process/management-style.rst index dea2e66c9a10..45595fd8a66b 100644 --- a/Documentation/process/management-style.rst +++ b/Documentation/process/management-style.rst @@ -5,7 +5,7 @@ Linux kernel management style This is a short document describing the preferred (or made up, depending on who you ask) management style for the linux kernel. It's meant to -mirror the CodingStyle document to some degree, and mainly written to +mirror the process/coding-style.rst document to some degree, and mainly written to avoid answering [#f1]_ the same (or similar) questions over and over again. Management style is very personal and much harder to quantify than diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst index 4d82e31b7958..11ec2d93a5e0 100644 --- a/Documentation/process/stable-kernel-rules.rst +++ b/Documentation/process/stable-kernel-rules.rst @@ -27,7 +27,7 @@ Rules on what kind of patches are accepted, and which ones are not, into the - It cannot contain any "trivial" fixes in it (spelling changes, whitespace cleanups, etc). - It must follow the - :ref:`Documentation/SubmittingPatches ` + :ref:`Documentation/process/submitting-patches.rst ` rules. - It or an equivalent fix must already exist in Linus' tree (upstream). @@ -40,7 +40,7 @@ Procedure for submitting patches to the -stable tree Documentation/networking/netdev-FAQ.txt - Security patches should not be handled (solely) by the -stable review process but should follow the procedures in - :ref:`Documentation/SecurityBugs `. + :ref:`Documentation/admin-guide/security-bugs.rst `. For all other submissions, choose one of the following procedures ----------------------------------------------------------------- diff --git a/Documentation/process/submit-checklist.rst b/Documentation/process/submit-checklist.rst index 894289b22b15..a0d9d34bfb6d 100644 --- a/Documentation/process/submit-checklist.rst +++ b/Documentation/process/submit-checklist.rst @@ -7,7 +7,7 @@ Here are some basic things that developers should do if they want to see their kernel patch submissions accepted more quickly. These are all above and beyond the documentation that is provided in -:ref:`Documentation/SubmittingPatches ` +:ref:`Documentation/process/submitting-patches.rst ` and elsewhere regarding submitting Linux kernel patches. @@ -31,7 +31,7 @@ and elsewhere regarding submitting Linux kernel patches. tends to use ``unsigned long`` for 64-bit quantities. 5) Check your patch for general style as detailed in - :ref:`Documentation/CodingStyle `. + :ref:`Documentation/process/coding-style.rst `. Check for trivial violations with the patch style checker prior to submission (``scripts/checkpatch.pl``). You should be able to justify all violations that remain in @@ -78,7 +78,7 @@ and elsewhere regarding submitting Linux kernel patches. 16) All new ``/proc`` entries are documented under ``Documentation/`` 17) All new kernel boot parameters are documented in - ``Documentation/kernel-parameters.txt``. + ``Documentation/admin-guide/kernel-parameters.rst``. 18) All new module parameters are documented with ``MODULE_PARM_DESC()`` diff --git a/Documentation/process/submitting-drivers.rst b/Documentation/process/submitting-drivers.rst index 252b77a23fad..0939d018c289 100644 --- a/Documentation/process/submitting-drivers.rst +++ b/Documentation/process/submitting-drivers.rst @@ -8,7 +8,7 @@ various kernel trees. Note that if you are interested in video card drivers you should probably talk to XFree86 (http://www.xfree86.org/) and/or X.Org (http://x.org/) instead. -Also read the Documentation/SubmittingPatches document. +Also read the Documentation/process/submitting-patches.rst document. Allocating Device Numbers @@ -19,7 +19,7 @@ by the Linux assigned name and number authority (currently this is Torben Mathiasen). The site is http://www.lanana.org/. This also deals with allocating numbers for devices that are not going to be submitted to the mainstream kernel. -See Documentation/devices.txt for more information on this. +See Documentation/admin-guide/devices.rst for more information on this. If you don't use assigned numbers then when your device is submitted it will be given an assigned number even if that is different from values you may @@ -73,7 +73,7 @@ Interfaces: Code: Please use the Linux style of code formatting as documented - in :ref:`Documentation/CodingStyle `. + in :ref:`Documentation/process/coding-style.rst `. If you have sections of code that need to be in other formats, for example because they are shared with a windows driver kit and you want to @@ -109,7 +109,7 @@ PM support: anything. For the driver testing instructions see Documentation/power/drivers-testing.txt and for a relatively complete overview of the power management issues related to - drivers see Documentation/power/devices.txt . + drivers see Documentation/power/admin-guide/devices.rst . Control: In general if there is active maintenance of a driver by diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index 4cc20b2c6df3..b4cf8f375184 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -11,10 +11,10 @@ can greatly increase the chances of your change being accepted. This document contains a large number of suggestions in a relatively terse format. For detailed information on how the kernel development process works, see :ref:`Documentation/process `. -Also, read :ref:`Documentation/SubmitChecklist ` +Also, read :ref:`Documentation/process/submit-checklist.rst ` for a list of items to check before submitting code. If you are submitting a driver, also read -:ref:`Documentation/SubmittingDrivers `; +:ref:`Documentation/process/submitting-drivers.rst `; for device tree binding patches, read Documentation/devicetree/bindings/submitting-patches.txt. @@ -238,7 +238,7 @@ then only post say 15 or so at a time and wait for review and integration. Check your patch for basic style violations, details of which can be found in -:ref:`Documentation/CodingStyle `. +:ref:`Documentation/process/coding-style.rst `. Failure to do so simply wastes the reviewers time and will get your patch rejected, probably without even being read. @@ -305,7 +305,7 @@ toward the stable maintainers by putting a line like this:: into the sign-off area of your patch (note, NOT an email recipient). You should also read -:ref:`Documentation/stable_kernel_rules.txt ` +:ref:`Documentation/process/stable-kernel-rules.rst ` in addition to this file. Note, however, that some subsystem maintainers want to come to their own @@ -363,7 +363,7 @@ decreasing the likelihood of your MIME-attached change being accepted. Exception: If your mailer is mangling patches then someone may ask you to re-send them using MIME. -See :ref:`Documentation/email-clients.txt ` +See :ref:`Documentation/process/email-clients.rst ` for hints about configuring your e-mail client so that it sends your patches untouched. @@ -828,8 +828,8 @@ Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer". NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people! -Kernel Documentation/CodingStyle: - :ref:`Documentation/CodingStyle ` +Kernel Documentation/process/coding-style.rst: + :ref:`Documentation/process/coding-style.rst ` Linus Torvalds's mail on the canonical patch format: diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt index 1f0c27049340..8c174063b3f0 100644 --- a/Documentation/rfkill.txt +++ b/Documentation/rfkill.txt @@ -26,7 +26,7 @@ whether they can be changed or not: the system software. The rfkill subsystem has two parameters, rfkill.default_state and -rfkill.master_switch_mode, which are documented in kernel-parameters.txt. +rfkill.master_switch_mode, which are documented in admin-guide/kernel-parameters.rst. 2. Implementation details diff --git a/Documentation/scsi/scsi-parameters.txt b/Documentation/scsi/scsi-parameters.txt index 8e66dafa41e1..8477655c0e46 100644 --- a/Documentation/scsi/scsi-parameters.txt +++ b/Documentation/scsi/scsi-parameters.txt @@ -1,7 +1,7 @@ SCSI Kernel Parameters ~~~~~~~~~~~~~~~~~~~~~~ -See Documentation/kernel-parameters.txt for general information on +See Documentation/admin-guide/kernel-parameters.rst for general information on specifying module parameters. This document may not be entirely up to date and comprehensive. The command diff --git a/Documentation/scsi/scsi_mid_low_api.txt b/Documentation/scsi/scsi_mid_low_api.txt index 255075157511..6338400eed73 100644 --- a/Documentation/scsi/scsi_mid_low_api.txt +++ b/Documentation/scsi/scsi_mid_low_api.txt @@ -336,7 +336,7 @@ in parallel by these functions. Conventions =========== First, Linus Torvalds's thoughts on C coding style can be found in the -Documentation/CodingStyle file. +Documentation/process/coding-style.rst file. Next, there is a movement to "outlaw" typedefs introducing synonyms for struct tags. Both can be still found in the SCSI subsystem, but diff --git a/Documentation/scsi/sym53c8xx_2.txt b/Documentation/scsi/sym53c8xx_2.txt index 6af8f7a7770f..d28186553fb0 100644 --- a/Documentation/scsi/sym53c8xx_2.txt +++ b/Documentation/scsi/sym53c8xx_2.txt @@ -427,7 +427,7 @@ Synchronous transfers frequency (default answer: 80) 10.1 Syntax Setup commands can be passed to the driver either at boot time or as -parameters to modprobe, as described in Documentation/kernel-parameters.txt +parameters to modprobe, as described in Documentation/admin-guide/kernel-parameters.rst Example of boot setup command under lilo prompt: diff --git a/Documentation/sound/alsa/alsa-parameters.txt b/Documentation/sound/alsa/alsa-parameters.txt index 0fa40679b080..72eced86f035 100644 --- a/Documentation/sound/alsa/alsa-parameters.txt +++ b/Documentation/sound/alsa/alsa-parameters.txt @@ -1,7 +1,7 @@ ALSA Kernel Parameters ~~~~~~~~~~~~~~~~~~~~~~ -See Documentation/kernel-parameters.txt for general information on +See Documentation/admin-guide/kernel-parameters.rst for general information on specifying module parameters. This document may not be entirely up to date and comprehensive. The command diff --git a/Documentation/sound/oss/oss-parameters.txt b/Documentation/sound/oss/oss-parameters.txt index 3ab391e7c295..cc675f25eee4 100644 --- a/Documentation/sound/oss/oss-parameters.txt +++ b/Documentation/sound/oss/oss-parameters.txt @@ -1,7 +1,7 @@ OSS Kernel Parameters ~~~~~~~~~~~~~~~~~~~~~ -See Documentation/kernel-parameters.txt for general information on +See Documentation/admin-guide/kernel-parameters.rst for general information on specifying module parameters. This document may not be entirely up to date and comprehensive. The command diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index ffab8b5caa60..6bb78f872929 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -71,7 +71,7 @@ show up in /proc/sys/kernel: - printk_ratelimit_burst - pty ==> Documentation/filesystems/devpts.txt - randomize_va_space -- real-root-dev ==> Documentation/initrd.txt +- real-root-dev ==> Documentation/admin-guide/initrd.rst - reboot-cmd [ SPARC only ] - rtsig-max - rtsig-nr @@ -453,7 +453,7 @@ in a KVM virtual machine. This default can be overridden by adding nmi_watchdog=1 -to the guest kernel command line (see Documentation/kernel-parameters.txt). +to the guest kernel command line (see Documentation/admin-guide/kernel-parameters.rst). ============================================================== diff --git a/Documentation/virtual/kvm/review-checklist.txt b/Documentation/virtual/kvm/review-checklist.txt index a850986ed684..a83b27635fdd 100644 --- a/Documentation/virtual/kvm/review-checklist.txt +++ b/Documentation/virtual/kvm/review-checklist.txt @@ -1,8 +1,8 @@ Review checklist for kvm patches ================================ -1. The patch must follow Documentation/CodingStyle and - Documentation/SubmittingPatches. +1. The patch must follow Documentation/process/coding-style.rst and + Documentation/process/submitting-patches.rst. 2. Patches should be against kvm.git master branch. diff --git a/Documentation/vm/numa b/Documentation/vm/numa index e0b58c0e6b49..a08f71647714 100644 --- a/Documentation/vm/numa +++ b/Documentation/vm/numa @@ -82,7 +82,7 @@ such as DMA or DMA32, represent relatively scarce resources. Linux chooses a default zonelist order based on the sizes of the various zone types relative to the total memory of the node and the total memory of the system. The default zonelist order may be overridden using the numa_zonelist_order kernel -boot parameter or sysctl. [see Documentation/kernel-parameters.txt and +boot parameter or sysctl. [see Documentation/admin-guide/kernel-parameters.rst and Documentation/sysctl/vm.txt] By default, Linux will attempt to satisfy memory allocation requests from the diff --git a/Documentation/watchdog/convert_drivers_to_kernel_api.txt b/Documentation/watchdog/convert_drivers_to_kernel_api.txt index 271b8850dde7..9fffb2958d13 100644 --- a/Documentation/watchdog/convert_drivers_to_kernel_api.txt +++ b/Documentation/watchdog/convert_drivers_to_kernel_api.txt @@ -213,6 +213,6 @@ The entry for the driver now needs to select WATCHDOG_CORE: Create a patch and send it to upstream -------------------------------------- -Make sure you understood Documentation/SubmittingPatches and send your patch to +Make sure you understood Documentation/process/submitting-patches.rst and send your patch to linux-watchdog@vger.kernel.org. We are looking forward to it :) diff --git a/Documentation/watchdog/watchdog-parameters.txt b/Documentation/watchdog/watchdog-parameters.txt index a8d364227a77..e21850e270a0 100644 --- a/Documentation/watchdog/watchdog-parameters.txt +++ b/Documentation/watchdog/watchdog-parameters.txt @@ -4,7 +4,7 @@ be listed here unless the driver has its own driver-specific information file. -See Documentation/kernel-parameters.txt for information on +See Documentation/admin-guide/kernel-parameters.rst for information on providing kernel parameters for builtin drivers versus loadable modules. diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index 9da6f3512249..5e9b826b5f62 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt @@ -921,7 +921,7 @@ They should normally not be deleted from the kernel command line even though not all of them are actually meaningful to the kernel. Boot loader authors who need additional command line options for the boot loader itself should get them registered in -Documentation/kernel-parameters.txt to make sure they will not +Documentation/admin-guide/kernel-parameters.rst to make sure they will not conflict with actual kernel options now or in the future. vga= diff --git a/Documentation/zh_CN/CodingStyle b/Documentation/zh_CN/CodingStyle index 12717791baac..b02738042799 100644 --- a/Documentation/zh_CN/CodingStyle +++ b/Documentation/zh_CN/CodingStyle @@ -1,4 +1,4 @@ -Chinese translated version of Documentation/CodingStyle +Chinese translated version of Documentation/process/coding-style.rst If you have any comment or update to the content, please post to LKML directly. However, if you have problem communicating in English you can also ask the @@ -7,7 +7,7 @@ translation is outdated or there is problem with translation. Chinese maintainer: Zhang Le --------------------------------------------------------------------- -Documentation/CodingStyle的中文翻译 +Documentation/process/coding-style.rst的中文翻译 如果想评论或更新本文的内容,请直接发信到LKML。如果你使用英文交流有困难的话,也可 以向中文版维护者求助。如果本翻译更新不及时或者翻译存在问题,请联系中文版维护者。 @@ -809,5 +809,5 @@ GNU 手册 - 遵循 K&R 标准和此文本 - cpp, gcc, gcc internals and indent, WG14是C语言的国际标准化工作组,URL: http://www.open-std.org/JTC1/SC22/WG14/ -Kernel CodingStyle,作者 greg@kroah.com 发表于OLS 2002: +Kernel process/coding-style.rst,作者 greg@kroah.com 发表于OLS 2002: http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/ diff --git a/Documentation/zh_CN/HOWTO b/Documentation/zh_CN/HOWTO index f0613b92e0be..11be075ba5fa 100644 --- a/Documentation/zh_CN/HOWTO +++ b/Documentation/zh_CN/HOWTO @@ -1,4 +1,4 @@ -Chinese translated version of Documentation/HOWTO +Chinese translated version of Documentation/process/howto.rst If you have any comment or update to the content, please contact the original document maintainer directly. However, if you have a problem @@ -9,7 +9,7 @@ or if there is a problem with the translation. Maintainer: Greg Kroah-Hartman Chinese maintainer: Li Yang --------------------------------------------------------------------- -Documentation/HOWTO 的中文翻译 +Documentation/process/howto.rst 的中文翻译 如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文 交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻 @@ -93,16 +93,16 @@ Linux内核代码中包含有大量的文档。这些文档对于学习如何与 文件简要介绍了Linux内核的背景,并且描述了如何配置和编译内核。内核的 新用户应该从这里开始。 - Documentation/Changes + Documentation/process/changes.rst 文件给出了用来编译和使用内核所需要的最小软件包列表。 - Documentation/CodingStyle + Documentation/process/coding-style.rst 描述Linux内核的代码风格和理由。所有新代码需要遵守这篇文档中定义的规 范。大多数维护者只会接收符合规定的补丁,很多人也只会帮忙检查符合风格 的代码。 - Documentation/SubmittingPatches - Documentation/SubmittingDrivers + Documentation/process/submitting-patches.rst + Documentation/process/submitting-drivers.rst 这两份文档明确描述如何创建和发送补丁,其中包括(但不仅限于): - 邮件内容 - 邮件格式 @@ -116,7 +116,7 @@ Linux内核代码中包含有大量的文档。这些文档对于学习如何与 "Linux kernel patch submission format" http://linux.yyz.us/patch-format.html - Documentation/stable_api_nonsense.txt + Documentation/process/stable-api-nonsense.rst 论证内核为什么特意不包括稳定的内核内部API,也就是说不包括像这样的特 性: - 子系统中间层(为了兼容性?) @@ -125,23 +125,23 @@ Linux内核代码中包含有大量的文档。这些文档对于学习如何与 这篇文档对于理解Linux的开发哲学至关重要。对于将开发平台从其他操作系 统转移到Linux的人来说也很重要。 - Documentation/SecurityBugs + Documentation/admin-guide/security-bugs.rst 如果你认为自己发现了Linux内核的安全性问题,请根据这篇文档中的步骤来 提醒其他内核开发者并帮助解决这个问题。 - Documentation/ManagementStyle + Documentation/process/management-style.rst 描述内核维护者的工作方法及其共有特点。这对于刚刚接触内核开发(或者对 它感到好奇)的人来说很重要,因为它解释了很多对于内核维护者独特行为的 普遍误解与迷惑。 - Documentation/stable_kernel_rules.txt + Documentation/process/stable-kernel-rules.rst 解释了稳定版内核发布的规则,以及如何将改动放入这些版本的步骤。 - Documentation/kernel-docs.txt + Documentation/process/kernel-docs.rst 有助于内核开发的外部文档列表。如果你在内核自带的文档中没有找到你想找 的内容,可以查看这些文档。 - Documentation/applying-patches.txt + Documentation/process/applying-patches.rst 关于补丁是什么以及如何将它打在不同内核开发分支上的好介绍 内核还拥有大量从代码自动生成的文档。它包含内核内部API的全面介绍以及如何 @@ -238,7 +238,7 @@ kernel.org网站的pub/linux/kernel/v2.6/目录下找到它。它的开发遵循 2.6.x.y版本由“稳定版”小组(邮件地址)维护,一般隔周发 布新版本。 -内核源码中的Documentation/stable_kernel_rules.txt文件具体描述了可被稳定 +内核源码中的Documentation/process/stable-kernel-rules.rst文件具体描述了可被稳定 版内核接受的修改类型以及发布的流程。 @@ -329,7 +329,7 @@ bugzilla.kernel.org是Linux内核开发者们用来跟踪内核Bug的网站。 户在这个工具中报告找到的所有bug。如何使用内核bugzilla的细节请访问: http://test.kernel.org/bugzilla/faq.html -内核源码主目录中的REPORTING-BUGS文件里有一个很好的模板。它指导用户如何报 +内核源码主目录中的admin-guide/reporting-bugs.rst文件里有一个很好的模板。它指导用户如何报 告可能的内核bug以及需要提供哪些信息来帮助内核开发者们找到问题的根源。 @@ -380,7 +380,7 @@ MAINTAINERS文件中可以找到不同话题对应的邮件列表。 这几行。将你的评论加在被引用的段落之间而不要放在邮件的顶部。 如果你在邮件中附带补丁,请确认它们是可以直接阅读的纯文本(如 -Documentation/SubmittingPatches文档中所述)。内核开发者们不希望遇到附件 +Documentation/process/submitting-patches.rst文档中所述)。内核开发者们不希望遇到附件 或者被压缩了的补丁。只有这样才能保证他们可以直接评论你的每行代码。请确保 你使用的邮件发送程序不会修改空格和制表符。一个防范性的测试方法是先将邮件 发送给自己,然后自己尝试是否可以顺利地打上收到的补丁。如果测试不成功,请 diff --git a/Documentation/zh_CN/SecurityBugs b/Documentation/zh_CN/SecurityBugs index d21eb07fe943..2d0fffd122ce 100644 --- a/Documentation/zh_CN/SecurityBugs +++ b/Documentation/zh_CN/SecurityBugs @@ -1,4 +1,4 @@ -Chinese translated version of Documentation/SecurityBugs +Chinese translated version of Documentation/admin-guide/security-bugs.rst If you have any comment or update to the content, please contact the original document maintainer directly. However, if you have a problem @@ -8,7 +8,7 @@ or if there is a problem with the translation. Chinese maintainer: Harry Wei --------------------------------------------------------------------- -Documentation/SecurityBugs 的中文翻译 +Documentation/admin-guide/security-bugs.rst 的中文翻译 如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文 交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻 @@ -31,7 +31,7 @@ linux内核安全团队可以通过email来联系。这是 一组独立的安全工作人员,可以帮助改善漏洞报告并且公布和取消一个修复。安 全团队有可能会从部分的维护者那里引进额外的帮助来了解并且修复安全漏洞。 当遇到任何漏洞,所能提供的信息越多就越能诊断和修复。如果你不清楚什么 -是有帮助的信息,那就请重温一下REPORTING-BUGS文件中的概述过程。任 +是有帮助的信息,那就请重温一下admin-guide/reporting-bugs.rst文件中的概述过程。任 何攻击性的代码都是非常有用的,未经报告者的同意不会被取消,除非它已经 被公布于众。 diff --git a/Documentation/zh_CN/SubmittingDrivers b/Documentation/zh_CN/SubmittingDrivers index d313f5d8448d..929385e4b194 100644 --- a/Documentation/zh_CN/SubmittingDrivers +++ b/Documentation/zh_CN/SubmittingDrivers @@ -1,4 +1,4 @@ -Chinese translated version of Documentation/SubmittingDrivers +Chinese translated version of Documentation/process/submitting-drivers.rst If you have any comment or update to the content, please contact the original document maintainer directly. However, if you have a problem @@ -8,7 +8,7 @@ or if there is a problem with the translation. Chinese maintainer: Li Yang --------------------------------------------------------------------- -Documentation/SubmittingDrivers 的中文翻译 +Documentation/process/submitting-drivers.rst 的中文翻译 如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文 交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻 @@ -30,7 +30,7 @@ Documentation/SubmittingDrivers 的中文翻译 兴趣的是显卡驱动程序,你也许应该访问 XFree86 项目(http://www.xfree86.org/) 和/或 X.org 项目 (http://x.org)。 -另请参阅 Documentation/SubmittingPatches 文档。 +另请参阅 Documentation/process/submitting-patches.rst 文档。 分配设备号 @@ -39,7 +39,7 @@ Documentation/SubmittingDrivers 的中文翻译 块设备和字符设备的主设备号与从设备号是由 Linux 命名编号分配权威 LANANA( 现在是 Torben Mathiasen)负责分配。申请的网址是 http://www.lanana.org/。 即使不准备提交到主流内核的设备驱动也需要在这里分配设备号。有关详细信息, -请参阅 Documentation/devices.txt。 +请参阅 Documentation/admin-guide/devices.rst。 如果你使用的不是已经分配的设备号,那么当你提交设备驱动的时候,它将会被强 制分配一个新的设备号,即便这个设备号和你之前发给客户的截然不同。 @@ -81,7 +81,7 @@ Linux 2.6: 如果你需要一个 Linux 和 NT 的通用驱动接口,那么请在用 户空间实现它。 -代码: 请使用 Documentation/CodingStyle 中所描述的 Linux 代码风 +代码: 请使用 Documentation/process/coding-style.rst 中所描述的 Linux 代码风 格。如果你的某些代码段(例如那些与 Windows 驱动程序包共 享的代码段)需要使用其他格式,而你却只希望维护一份代码, 那么请将它们很好地区分出来,并且注明原因。 @@ -107,7 +107,7 @@ Linux 2.6: 程序测试的指导,请参阅 Documentation/power/drivers-testing.txt。有关驱动程序电 源管理问题相对全面的概述,请参阅 - Documentation/power/devices.txt。 + Documentation/power/admin-guide/devices.rst。 管理: 如果一个驱动程序的作者还在进行有效的维护,那么通常除了那 些明显正确且不需要任何检查的补丁以外,其他所有的补丁都会 diff --git a/Documentation/zh_CN/SubmittingPatches b/Documentation/zh_CN/SubmittingPatches index 1d3a10f8746b..e9098da8f1a4 100644 --- a/Documentation/zh_CN/SubmittingPatches +++ b/Documentation/zh_CN/SubmittingPatches @@ -1,4 +1,4 @@ -Chinese translated version of Documentation/SubmittingPatches +Chinese translated version of Documentation/process/submitting-patches.rst If you have any comment or update to the content, please contact the original document maintainer directly. However, if you have a problem @@ -8,7 +8,7 @@ or if there is a problem with the translation. Chinese maintainer: TripleX Chung --------------------------------------------------------------------- -Documentation/SubmittingPatches 的中文翻译 +Documentation/process/submitting-patches.rst 的中文翻译 如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文 交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻 @@ -30,9 +30,9 @@ Documentation/SubmittingPatches 的中文翻译 对于想要将改动提交到 Linux 内核的个人或者公司来说,如果不熟悉“规矩”, 提交的流程会让人畏惧。本文档收集了一系列建议,这些建议可以大大的提高你 的改动被接受的机会。 -阅读 Documentation/SubmitChecklist 来获得在提交代码前需要检查的项目的列 +阅读 Documentation/process/submit-checklist.rst 来获得在提交代码前需要检查的项目的列 表。如果你在提交一个驱动程序,那么同时阅读一下 -Documentation/SubmittingDrivers 。 +Documentation/process/submitting-drivers.rst 。 -------------------------- @@ -338,7 +338,7 @@ e-mail 标题中的“一句话概述”扼要的描述 e-mail 中的补丁。 本节包含很多和提交到内核的代码有关的通常的"规则"。事情永远有例外...但是 你必须真的有好的理由这样做。你可以把本节叫做Linus的计算机科学入门课。 -1) 读 Document/CodingStyle +1) 读 Document/process/coding-style.rst Nuff 说过,如果你的代码和这个偏离太多,那么它有可能会被拒绝,没有更多的 审查,没有更多的评价。 @@ -404,8 +404,8 @@ Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer". NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people! -Kernel Documentation/CodingStyle: - +Kernel Documentation/process/coding-style.rst: + Linus Torvalds's mail on the canonical patch format: diff --git a/Documentation/zh_CN/arm/Booting b/Documentation/zh_CN/arm/Booting index 6158a64df80c..1fe866f8218f 100644 --- a/Documentation/zh_CN/arm/Booting +++ b/Documentation/zh_CN/arm/Booting @@ -68,7 +68,7 @@ RAM,或可能使用对这个设备已知的 RAM 信息,还可能使用任何 作为替代方案,引导加载程序也可以通过标签列表传递相关的'console=' 选项给内核以指定某个串口,而串口数据格式的选项在以下文档中描述: - Documentation/kernel-parameters.txt。 + Documentation/admin-guide/kernel-parameters.rst。 3、检测机器类型 diff --git a/Documentation/zh_CN/email-clients.txt b/Documentation/zh_CN/email-clients.txt index b9a1a3e6c78d..ec31d97e8d0e 100644 --- a/Documentation/zh_CN/email-clients.txt +++ b/Documentation/zh_CN/email-clients.txt @@ -1,4 +1,4 @@ -Chinese translated version of Documentation/email-clients.txt +Chinese translated version of Documentation/process/email-clients.rst If you have any comment or update to the content, please contact the original document maintainer directly. However, if you have a problem @@ -8,7 +8,7 @@ or if there is a problem with the translation. Chinese maintainer: Harry Wei --------------------------------------------------------------------- -Documentation/email-clients.txt 的中文翻译 +Documentation/process/email-clients.rst 的中文翻译 如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文 交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻 diff --git a/Documentation/zh_CN/oops-tracing.txt b/Documentation/zh_CN/oops-tracing.txt index 9312608ffb8d..41ab53cc0e83 100644 --- a/Documentation/zh_CN/oops-tracing.txt +++ b/Documentation/zh_CN/oops-tracing.txt @@ -1,4 +1,4 @@ -Chinese translated version of Documentation/oops-tracing.txt +Chinese translated version of Documentation/admin-guide/oops-tracing.rst If you have any comment or update to the content, please contact the original document maintainer directly. However, if you have a problem @@ -8,7 +8,7 @@ or if there is a problem with the translation. Chinese maintainer: Dave Young --------------------------------------------------------------------- -Documentation/oops-tracing.txt 的中文翻译 +Documentation/admin-guide/oops-tracing.rst 的中文翻译 如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文 交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻 @@ -50,7 +50,7 @@ cat /proc/kmsg > file, 然而你必须介入中止传输, kmsg是一个“ 息滚动到了终端的上面,你会发现以高分辩率启动(比如,vga=791)会让你读到更多的文 本。(注意:这需要vesafb,所以对‘早期’的oops没有帮助) -(2)用串口终端启动(请参看Documentation/serial-console.txt),运行一个null +(2)用串口终端启动(请参看Documentation/admin-guide/serial-console.rst),运行一个null modem到另一台机器并用你喜欢的通讯工具获取输出。Minicom工作地很好。 (3)使用Kdump(请参看Documentation/kdump/kdump.txt), diff --git a/Documentation/zh_CN/stable_api_nonsense.txt b/Documentation/zh_CN/stable_api_nonsense.txt index c26a27d1ee7d..a2b27fab382c 100644 --- a/Documentation/zh_CN/stable_api_nonsense.txt +++ b/Documentation/zh_CN/stable_api_nonsense.txt @@ -1,4 +1,4 @@ -Chinese translated version of Documentation/stable_api_nonsense.txt +Chinese translated version of Documentation/process/stable-api-nonsense.rst If you have any comment or update to the content, please contact the original document maintainer directly. However, if you have problem @@ -9,7 +9,7 @@ is problem with translation. Maintainer: Greg Kroah-Hartman Chinese maintainer: TripleX Chung --------------------------------------------------------------------- -Documentation/stable_api_nonsense.txt 的中文翻译 +Documentation/process/stable-api-nonsense.rst 的中文翻译 如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文 交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻 diff --git a/Documentation/zh_CN/stable_kernel_rules.txt b/Documentation/zh_CN/stable_kernel_rules.txt index 26ea5ed7cd9c..db4ba5a0c39a 100644 --- a/Documentation/zh_CN/stable_kernel_rules.txt +++ b/Documentation/zh_CN/stable_kernel_rules.txt @@ -1,4 +1,4 @@ -Chinese translated version of Documentation/stable_kernel_rules.txt +Chinese translated version of Documentation/process/stable-kernel-rules.rst If you have any comment or update to the content, please contact the original document maintainer directly. However, if you have a problem @@ -8,7 +8,7 @@ or if there is a problem with the translation. Chinese maintainer: TripleX Chung --------------------------------------------------------------------- -Documentation/stable_kernel_rules.txt 的中文翻译 +Documentation/process/stable-kernel-rules.rst 的中文翻译 如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文 交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻 @@ -38,7 +38,7 @@ Documentation/stable_kernel_rules.txt 的中文翻译 - 没有“理论上的竞争条件”,除非能给出竞争条件如何被利用的解释。 - 不能存在任何的“琐碎的”修正(拼写修正,去掉多余空格之类的)。 - 必须被相关子系统的维护者接受。 - - 必须遵循Documentation/SubmittingPatches里的规则。 + - 必须遵循Documentation/process/submitting-patches.rst里的规则。 向稳定版代码树提交补丁的过程: diff --git a/Documentation/zh_CN/volatile-considered-harmful.txt b/Documentation/zh_CN/volatile-considered-harmful.txt index ba8149d2233a..475125967197 100644 --- a/Documentation/zh_CN/volatile-considered-harmful.txt +++ b/Documentation/zh_CN/volatile-considered-harmful.txt @@ -1,4 +1,4 @@ -Chinese translated version of Documentation/volatile-considered-harmful.txt +Chinese translated version of Documentation/process/volatile-considered-harmful.rst If you have any comment or update to the content, please contact the original document maintainer directly. However, if you have a problem @@ -9,7 +9,7 @@ or if there is a problem with the translation. Maintainer: Jonathan Corbet Chinese maintainer: Bryan Wu --------------------------------------------------------------------- -Documentation/volatile-considered-harmful.txt 的中文翻译 +Documentation/process/volatile-considered-harmful.rst 的中文翻译 如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文 交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻 diff --git a/MAINTAINERS b/MAINTAINERS index de0451df542f..69820b75b2e0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -35,13 +35,13 @@ trivial patch so apply some common sense. PLEASE check your patch with the automated style checker (scripts/checkpatch.pl) to catch trivial style violations. - See Documentation/CodingStyle for guidance here. + See Documentation/process/coding-style.rst for guidance here. PLEASE CC: the maintainers and mailing lists that are generated by scripts/get_maintainer.pl. The results returned by the script will be best if you have git installed and are making your changes in a branch derived from Linus' latest git tree. - See Documentation/SubmittingPatches for details. + See Documentation/process/submitting-patches.rst for details. PLEASE try to include any credit lines you want added with the patch. It avoids people being missed off by mistake and makes @@ -54,7 +54,7 @@ trivial patch so apply some common sense. of the Linux Foundation certificate of contribution and should include a Signed-off-by: line. The current version of this "Developer's Certificate of Origin" (DCO) is listed in the file - Documentation/SubmittingPatches. + Documentation/process/submitting-patches.rst. 6. Make sure you have the right to send any changes you make. If you do changes at work you may find your employer owns the patch @@ -2924,7 +2924,7 @@ CAPELLA MICROSYSTEMS LIGHT SENSOR DRIVER M: Kevin Tsai S: Maintained F: drivers/iio/light/cm* -F: Documentation/devicetree/bindings/i2c/trivial-devices.txt +F: Documentation/devicetree/bindings/i2c/trivial-admin-guide/devices.rst CAVIUM I2C DRIVER M: Jan Glauber @@ -11438,7 +11438,7 @@ STABLE BRANCH M: Greg Kroah-Hartman L: stable@vger.kernel.org S: Supported -F: Documentation/stable_kernel_rules.txt +F: Documentation/process/stable-kernel-rules.rst STAGING SUBSYSTEM M: Greg Kroah-Hartman diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bada636d1065..19d237b0737d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1525,7 +1525,7 @@ config X86_CHECK_BIOS_CORRUPTION line. By default it scans the low 64k of memory every 60 seconds; see the memory_corruption_check_size and memory_corruption_check_period parameters in - Documentation/kernel-parameters.txt to adjust this. + Documentation/admin-guide/kernel-parameters.rst to adjust this. When enabled with the default parameters, this option has almost no overhead, as it reserves a relatively small amount diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 535e7828445a..c5f9cbe0ae21 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -342,7 +342,7 @@ config ACPI_DEBUG Use the acpi.debug_layer and acpi.debug_level kernel command-line parameters documented in Documentation/acpi/debug.txt and - Documentation/kernel-parameters.txt to control the type and + Documentation/admin-guide/kernel-parameters.rst to control the type and amount of debug output. config ACPI_PCI_SLOT diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 223a770f78f3..59ce0dd50701 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -129,7 +129,7 @@ static int ata_force_tbl_size; static char ata_force_param_buf[PAGE_SIZE] __initdata; /* param_buf is thrown away after initialization, disallow read */ module_param_string(force, ata_force_param_buf, sizeof(ata_force_param_buf), 0); -MODULE_PARM_DESC(force, "Force ATA configurations including cable type, link speed and transfer mode (see Documentation/kernel-parameters.txt for details)"); +MODULE_PARM_DESC(force, "Force ATA configurations including cable type, link speed and transfer mode (see Documentation/admin-guide/kernel-parameters.rst for details)"); static int atapi_enabled = 1; module_param(atapi_enabled, int, 0444); diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c index c115217c79ae..e051fc8aa7d7 100644 --- a/drivers/char/pcmcia/cm4000_cs.c +++ b/drivers/char/pcmcia/cm4000_cs.c @@ -14,7 +14,7 @@ * (C) 2000,2001,2002,2003,2004 Omnikey AG * * (C) 2005-2006 Harald Welte - * - Adhere to Kernel CodingStyle + * - Adhere to Kernel process/coding-style.rst * - Port to 2.6.13 "new" style PCMCIA * - Check for copy_{from,to}_user return values * - Use nonseekable_open() @@ -151,7 +151,7 @@ static struct pcmcia_device *dev_table[CM4000_MAX_DEV]; static struct class *cmm_class; /* This table doesn't use spaces after the comma between fields and thus - * violates CodingStyle. However, I don't really think wrapping it around will + * violates process/coding-style.rst. However, I don't really think wrapping it around will * make it any clearer to read -HW */ static unsigned char fi_di_table[10][14] = { /*FI 00 01 02 03 04 05 06 07 08 09 10 11 12 13 */ diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index db9538d4b358..a7be12d9a139 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -15,7 +15,7 @@ * See "Documentation/ABI/testing/sysfs-class-net-grcan" for information on the * sysfs interface. * - * See "Documentation/kernel-parameters.txt" for information on the module + * See "Documentation/admin-guide/kernel-parameters.rst" for information on the module * parameters. * * This program is free software; you can redistribute it and/or modify it diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index 8b2b740d6679..b20ce7da1ee4 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -28,7 +28,7 @@ config BLK_DEV_PMEM non-standard OEM-specific E820 memory type (type-12, see CONFIG_X86_PMEM_LEGACY), or it is manually specified by the 'memmap=nn[KMG]!ss[KMG]' kernel command line (see - Documentation/kernel-parameters.txt). This driver converts + Documentation/admin-guide/kernel-parameters.rst). This driver converts these persistent memory ranges into block devices that are capable of DAX (direct-access) file system mappings. See Documentation/nvdimm/nvdimm.txt for more details. diff --git a/drivers/staging/vme/devices/vme_user.c b/drivers/staging/vme/devices/vme_user.c index 5dd430f8f921..d84dffb894f4 100644 --- a/drivers/staging/vme/devices/vme_user.c +++ b/drivers/staging/vme/devices/vme_user.c @@ -47,7 +47,7 @@ static const char driver_name[] = "vme_user"; static int bus[VME_USER_BUS_MAX]; static unsigned int bus_num; -/* Currently Documentation/devices.txt defines the following for VME: +/* Currently Documentation/admin-guide/devices.rst defines the following for VME: * * 221 char VME bus * 0 = /dev/bus/vme/m0 First master image diff --git a/drivers/video/fbdev/skeletonfb.c b/drivers/video/fbdev/skeletonfb.c index f948baa16d82..e219a0a22077 100644 --- a/drivers/video/fbdev/skeletonfb.c +++ b/drivers/video/fbdev/skeletonfb.c @@ -836,7 +836,7 @@ static void xxxfb_remove(struct pci_dev *dev) * @dev: PCI device * @msg: the suspend event code. * - * See Documentation/power/devices.txt for more information + * See Documentation/power/admin-guide/devices.rst for more information */ static int xxxfb_suspend(struct pci_dev *dev, pm_message_t msg) { @@ -851,7 +851,7 @@ static int xxxfb_suspend(struct pci_dev *dev, pm_message_t msg) * xxxfb_resume - Optional but recommended function. Resume the device. * @dev: PCI device * - * See Documentation/power/devices.txt for more information + * See Documentation/power/admin-guide/devices.rst for more information */ static int xxxfb_resume(struct pci_dev *dev) { @@ -915,7 +915,7 @@ static void __exit xxxfb_exit(void) * @dev: platform device * @msg: the suspend event code. * - * See Documentation/power/devices.txt for more information + * See Documentation/power/admin-guide/devices.rst for more information */ static int xxxfb_suspend(struct platform_device *dev, pm_message_t msg) { @@ -930,7 +930,7 @@ static int xxxfb_suspend(struct platform_device *dev, pm_message_t msg) * xxxfb_resume - Optional but recommended function. Resume the device. * @dev: platform device * - * See Documentation/power/devices.txt for more information + * See Documentation/power/admin-guide/devices.rst for more information */ static int xxxfb_resume(struct platform_dev *dev) { diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index 77590320d44c..623f72334fa5 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -75,7 +75,7 @@ config VIRTIO_MMIO_CMDLINE_DEVICES Allow virtio-mmio devices instantiation via the kernel command line or module parameters. Be aware that using incorrect parameters (base address in particular) can crash your system - you have been warned. - See Documentation/kernel-parameters.txt for details. + See Documentation/admin-guide/kernel-parameters.rst for details. If unsure, say 'N'. diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 4c09d93d9569..b2f82cf6bf86 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -170,8 +170,8 @@ config BINFMT_MISC You can do other nice things, too. Read the file to learn how to use this - feature, for information about how - to include Java support. and for + feature, for information about how + to include Java support. and for information about how to include Mono-based .NET support. To use binfmt_misc, you will need to mount it: diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index be40813eff52..b42e5bd6d8ff 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -86,4 +86,4 @@ config PSTORE_RAM Note that for historical reasons, the module will be named "ramoops.ko". - For more information, see Documentation/ramoops.txt. + For more information, see Documentation/admin-guide/ramoops.rst. diff --git a/include/linux/device.h b/include/linux/device.h index bc41e87a969b..36d3a9867da9 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -733,7 +733,7 @@ struct device_dma_parameters { * minimizes board-specific #ifdefs in drivers. * @driver_data: Private pointer for driver specific info. * @power: For device power management. - * See Documentation/power/devices.txt for details. + * See Documentation/power/admin-guide/devices.rst for details. * @pm_domain: Provide callbacks that are executed during system suspend, * hibernation, system resume and during runtime PM transitions * along with subsystem-level and driver-level callbacks. diff --git a/include/linux/pm.h b/include/linux/pm.h index 06eb353182ab..efa67b2dfee9 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -258,7 +258,7 @@ typedef struct pm_message { * example, if it detects that a child was unplugged while the system was * asleep). * - * Refer to Documentation/power/devices.txt for more information about the role + * Refer to Documentation/power/admin-guide/devices.rst for more information about the role * of the above callbacks in the system suspend process. * * There also are callbacks related to runtime power management of devices. diff --git a/include/uapi/linux/major.h b/include/uapi/linux/major.h index 620252e69b44..19e195bee990 100644 --- a/include/uapi/linux/major.h +++ b/include/uapi/linux/major.h @@ -3,7 +3,7 @@ /* * This file has definitions for major device numbers. - * For the device number assignments, see Documentation/devices.txt. + * For the device number assignments, see Documentation/admin-guide/devices.rst. */ #define UNNAMED_MAJOR 0 diff --git a/init/Kconfig b/init/Kconfig index 34407f15e6d3..172f80ea0d58 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1306,7 +1306,7 @@ config BLK_DEV_INITRD boot loader (loadlin or lilo) and that is mounted as root before the normal boot procedure. It is typically used to load modules needed to mount the "real" root file system, - etc. See for details. + etc. See for details. If RAM disk support (BLK_DEV_RAM) is also included, this also enables initial RAM disk (initrd) support and adds diff --git a/init/main.c b/init/main.c index 2858be732f6d..691eb9351a83 100644 --- a/init/main.c +++ b/init/main.c @@ -980,7 +980,7 @@ static int __ref kernel_init(void *unused) return 0; panic("No working init found. Try passing init= option to kernel. " - "See Linux Documentation/init.txt for guidance."); + "See Linux Documentation/admin-guide/init.rst for guidance."); } static noinline void __init kernel_init_freeable(void) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 33bc56cf60d7..d2df3a93284b 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -13,7 +13,7 @@ config PRINTK_TIME be included, not that the timestamp is recorded. The behavior is also controlled by the kernel command line - parameter printk.time=1. See Documentation/kernel-parameters.txt + parameter printk.time=1. See Documentation/admin-guide/kernel-parameters.rst config MESSAGE_LOGLEVEL_DEFAULT int "Default message log level (1-7)" diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index a8368d1c4348..d0c729ccec20 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2187,7 +2187,7 @@ sub process { if ($rawline=~/^\+\+\+\s+(\S+)/) { $setup_docs = 0; - if ($1 =~ m@Documentation/kernel-parameters.txt$@) { + if ($1 =~ m@Documentation/admin-guide/kernel-parameters.rst$@) { $setup_docs = 1; } #next; @@ -5102,7 +5102,7 @@ sub process { my $asm_volatile = qr{\b(__asm__|asm)\s+(__volatile__|volatile)\b}; if ($line =~ /\bvolatile\b/ && $line !~ /$asm_volatile/) { WARN("VOLATILE", - "Use of volatile is usually wrong: see Documentation/volatile-considered-harmful.txt\n" . $herecurr); + "Use of volatile is usually wrong: see Documentation/process/volatile-considered-harmful.rst\n" . $herecurr); } # Check for user-visible strings broken across lines, which breaks the ability @@ -5817,7 +5817,7 @@ sub process { if (!grep(/$name/, @setup_docs)) { CHK("UNDOCUMENTED_SETUP", - "__setup appears un-documented -- check Documentation/kernel-parameters.txt\n" . $herecurr); + "__setup appears un-documented -- check Documentation/admin-guide/kernel-parameters.rst\n" . $herecurr); } } diff --git a/tools/testing/selftests/futex/README b/tools/testing/selftests/futex/README index 0558bb9ce0a6..f3926c33ed4c 100644 --- a/tools/testing/selftests/futex/README +++ b/tools/testing/selftests/futex/README @@ -59,4 +59,4 @@ o FIXME: decide on a sane test naming scheme. Currently the tests are named Coding Style ------------ o The Futex Test project adheres to the coding standards set forth by Linux - kernel as defined in the Linux source Documentation/CodingStyle. + kernel as defined in the Linux source Documentation/process/coding-style.rst. -- cgit v1.2.3 From 864e2fe935228c5c551da8638b5fdd07f82d04a7 Mon Sep 17 00:00:00 2001 From: Amitesh Singh Date: Fri, 30 Sep 2016 22:40:57 +0530 Subject: usb: fix a typo in usb_class_driver documentation replace usb_unregister_dev by usb_deregister_dev Signed-off-by: Amitesh Singh Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index eba1f10e8cfd..7e68259360de 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1160,7 +1160,7 @@ extern struct bus_type usb_bus_type; * @minor_base: the start of the minor range for this driver. * * This structure is used for the usb_register_dev() and - * usb_unregister_dev() functions, to consolidate a number of the + * usb_deregister_dev() functions, to consolidate a number of the * parameters used for them. */ struct usb_class_driver { -- cgit v1.2.3 From 08bcd3edec2559833aa0ed0213cc300fc9705dd6 Mon Sep 17 00:00:00 2001 From: Anthony Best Date: Tue, 4 Oct 2016 14:15:42 -0600 Subject: gpio: fix struct gpio_chip comment It should have been @reg_clr instead of @reg_clk Signed-off-by: Anthony Best Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 24e2cc56beb1..2dfcf25b1724 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -91,7 +91,7 @@ enum single_ended_mode { * bit. This callback assigns the right bit mask. * @reg_dat: data (in) register for generic GPIO * @reg_set: output set register (out=high) for generic GPIO - * @reg_clk: output clear register (out=low) for generic GPIO + * @reg_clr: output clear register (out=low) for generic GPIO * @reg_dir: direction setting register for generic GPIO * @bgpio_bits: number of register bits used for a generic GPIO i.e. * * 8 -- cgit v1.2.3 From daa6e41ce2b594e7d622b4cd3978defca1139666 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 6 Sep 2016 15:18:29 -0700 Subject: soc: qcom: wcnss_ctrl: Stub wcnss_ctrl API Stub the wcnss_ctrl API to allow compile testing wcnss function drivers. Cc: Marcel Holtmann Signed-off-by: Bjorn Andersson Acked-by: Andy Gross Signed-off-by: Andy Gross --- include/linux/soc/qcom/wcnss_ctrl.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/wcnss_ctrl.h b/include/linux/soc/qcom/wcnss_ctrl.h index a37bc5538f19..eab64976a73b 100644 --- a/include/linux/soc/qcom/wcnss_ctrl.h +++ b/include/linux/soc/qcom/wcnss_ctrl.h @@ -3,6 +3,19 @@ #include +#if IS_ENABLED(CONFIG_QCOM_WCNSS_CTRL) + struct qcom_smd_channel *qcom_wcnss_open_channel(void *wcnss, const char *name, qcom_smd_cb_t cb); +#else + +static inline struct qcom_smd_channel* +qcom_wcnss_open_channel(void *wcnss, const char *name, qcom_smd_cb_t cb) +{ + WARN_ON(1); + return ERR_PTR(-ENXIO); +} + +#endif + #endif -- cgit v1.2.3 From c8d283ff8b0b6b2061dfc137afd6c56608a34bcb Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Thu, 20 Oct 2016 21:20:59 +0200 Subject: crypto: ccp - fix typo "CPP" The abbreviation for Cryptographic Coprocessor is "CCP". Signed-off-by: Paul Bolle Acked-by: Gary R Hook Signed-off-by: Herbert Xu --- include/linux/ccp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ccp.h b/include/linux/ccp.h index a7653339fedb..c71dd8fa5764 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -11,8 +11,8 @@ * published by the Free Software Foundation. */ -#ifndef __CPP_H__ -#define __CPP_H__ +#ifndef __CCP_H__ +#define __CCP_H__ #include #include @@ -553,7 +553,7 @@ enum ccp_engine { #define CCP_CMD_PASSTHRU_NO_DMA_MAP 0x00000002 /** - * struct ccp_cmd - CPP operation request + * struct ccp_cmd - CCP operation request * @entry: list element (ccp driver use only) * @work: work element used for callbacks (ccp driver use only) * @ccp: CCP device to be run on (ccp driver use only) -- cgit v1.2.3 From 2ebda74fd6c9d3fc3b9f0234fc519795e23025a5 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Fri, 21 Oct 2016 13:19:47 +0100 Subject: crypto: acomp - add asynchronous compression api Add acomp, an asynchronous compression api that uses scatterlist buffers. Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- crypto/Kconfig | 10 ++ crypto/Makefile | 2 + crypto/acompress.c | 118 +++++++++++++++ crypto/crypto_user.c | 19 +++ include/crypto/acompress.h | 281 ++++++++++++++++++++++++++++++++++++ include/crypto/internal/acompress.h | 66 +++++++++ include/linux/crypto.h | 1 + include/uapi/linux/cryptouser.h | 5 + 8 files changed, 502 insertions(+) create mode 100644 crypto/acompress.c create mode 100644 include/crypto/acompress.h create mode 100644 include/crypto/internal/acompress.h (limited to 'include/linux') diff --git a/crypto/Kconfig b/crypto/Kconfig index fd288053b1c5..9950c47c9d27 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -102,6 +102,15 @@ config CRYPTO_KPP select CRYPTO_ALGAPI select CRYPTO_KPP2 +config CRYPTO_ACOMP2 + tristate + select CRYPTO_ALGAPI2 + +config CRYPTO_ACOMP + tristate + select CRYPTO_ALGAPI + select CRYPTO_ACOMP2 + config CRYPTO_RSA tristate "RSA algorithm" select CRYPTO_AKCIPHER @@ -138,6 +147,7 @@ config CRYPTO_MANAGER2 select CRYPTO_BLKCIPHER2 select CRYPTO_AKCIPHER2 select CRYPTO_KPP2 + select CRYPTO_ACOMP2 config CRYPTO_USER tristate "Userspace cryptographic algorithm configuration" diff --git a/crypto/Makefile b/crypto/Makefile index 99cc64ac70ef..0933dc6bd24c 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -50,6 +50,8 @@ rsa_generic-y += rsa_helper.o rsa_generic-y += rsa-pkcs1pad.o obj-$(CONFIG_CRYPTO_RSA) += rsa_generic.o +obj-$(CONFIG_CRYPTO_ACOMP2) += acompress.o + cryptomgr-y := algboss.o testmgr.o obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o diff --git a/crypto/acompress.c b/crypto/acompress.c new file mode 100644 index 000000000000..4977279476d3 --- /dev/null +++ b/crypto/acompress.c @@ -0,0 +1,118 @@ +/* + * Asynchronous Compression operations + * + * Copyright (c) 2016, Intel Corporation + * Authors: Weigang Li + * Giovanni Cabiddu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +#ifdef CONFIG_NET +static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg) +{ + struct crypto_report_acomp racomp; + + strncpy(racomp.type, "acomp", sizeof(racomp.type)); + + if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP, + sizeof(struct crypto_report_acomp), &racomp)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} +#else +static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg) +{ + return -ENOSYS; +} +#endif + +static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg) + __attribute__ ((unused)); + +static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg) +{ + seq_puts(m, "type : acomp\n"); +} + +static void crypto_acomp_exit_tfm(struct crypto_tfm *tfm) +{ + struct crypto_acomp *acomp = __crypto_acomp_tfm(tfm); + struct acomp_alg *alg = crypto_acomp_alg(acomp); + + alg->exit(acomp); +} + +static int crypto_acomp_init_tfm(struct crypto_tfm *tfm) +{ + struct crypto_acomp *acomp = __crypto_acomp_tfm(tfm); + struct acomp_alg *alg = crypto_acomp_alg(acomp); + + if (alg->exit) + acomp->base.exit = crypto_acomp_exit_tfm; + + if (alg->init) + return alg->init(acomp); + + return 0; +} + +static const struct crypto_type crypto_acomp_type = { + .extsize = crypto_alg_extsize, + .init_tfm = crypto_acomp_init_tfm, +#ifdef CONFIG_PROC_FS + .show = crypto_acomp_show, +#endif + .report = crypto_acomp_report, + .maskclear = ~CRYPTO_ALG_TYPE_MASK, + .maskset = CRYPTO_ALG_TYPE_MASK, + .type = CRYPTO_ALG_TYPE_ACOMPRESS, + .tfmsize = offsetof(struct crypto_acomp, base), +}; + +struct crypto_acomp *crypto_alloc_acomp(const char *alg_name, u32 type, + u32 mask) +{ + return crypto_alloc_tfm(alg_name, &crypto_acomp_type, type, mask); +} +EXPORT_SYMBOL_GPL(crypto_alloc_acomp); + +int crypto_register_acomp(struct acomp_alg *alg) +{ + struct crypto_alg *base = &alg->base; + + base->cra_type = &crypto_acomp_type; + base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; + base->cra_flags |= CRYPTO_ALG_TYPE_ACOMPRESS; + + return crypto_register_alg(base); +} +EXPORT_SYMBOL_GPL(crypto_register_acomp); + +int crypto_unregister_acomp(struct acomp_alg *alg) +{ + return crypto_unregister_alg(&alg->base); +} +EXPORT_SYMBOL_GPL(crypto_unregister_acomp); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Asynchronous compression type"); diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 1c5705481c69..a90404a0c5ff 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -112,6 +112,21 @@ nla_put_failure: return -EMSGSIZE; } +static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg) +{ + struct crypto_report_acomp racomp; + + strncpy(racomp.type, "acomp", sizeof(racomp.type)); + + if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP, + sizeof(struct crypto_report_acomp), &racomp)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_akcipher rakcipher; @@ -186,7 +201,11 @@ static int crypto_report_one(struct crypto_alg *alg, goto nla_put_failure; break; + case CRYPTO_ALG_TYPE_ACOMPRESS: + if (crypto_report_acomp(skb, alg)) + goto nla_put_failure; + break; case CRYPTO_ALG_TYPE_AKCIPHER: if (crypto_report_akcipher(skb, alg)) goto nla_put_failure; diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h new file mode 100644 index 000000000000..14c70d887160 --- /dev/null +++ b/include/crypto/acompress.h @@ -0,0 +1,281 @@ +/* + * Asynchronous Compression operations + * + * Copyright (c) 2016, Intel Corporation + * Authors: Weigang Li + * Giovanni Cabiddu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#ifndef _CRYPTO_ACOMP_H +#define _CRYPTO_ACOMP_H +#include + +#define CRYPTO_ACOMP_ALLOC_OUTPUT 0x00000001 + +/** + * struct acomp_req - asynchronous (de)compression request + * + * @base: Common attributes for asynchronous crypto requests + * @src: Source Data + * @dst: Destination data + * @slen: Size of the input buffer + * @dlen: Size of the output buffer and number of bytes produced + * @flags: Internal flags + * @__ctx: Start of private context data + */ +struct acomp_req { + struct crypto_async_request base; + struct scatterlist *src; + struct scatterlist *dst; + unsigned int slen; + unsigned int dlen; + u32 flags; + void *__ctx[] CRYPTO_MINALIGN_ATTR; +}; + +/** + * struct crypto_acomp - user-instantiated objects which encapsulate + * algorithms and core processing logic + * + * @base: Common crypto API algorithm data structure + */ +struct crypto_acomp { + struct crypto_tfm base; +}; + +/** + * struct acomp_alg - asynchronous compression algorithm + * + * @compress: Function performs a compress operation + * @decompress: Function performs a de-compress operation + * @dst_free: Frees destination buffer if allocated inside the algorithm + * @init: Initialize the cryptographic transformation object. + * This function is used to initialize the cryptographic + * transformation object. This function is called only once at + * the instantiation time, right after the transformation context + * was allocated. In case the cryptographic hardware has some + * special requirements which need to be handled by software, this + * function shall check for the precise requirement of the + * transformation and put any software fallbacks in place. + * @exit: Deinitialize the cryptographic transformation object. This is a + * counterpart to @init, used to remove various changes set in + * @init. + * + * @reqsize: Context size for (de)compression requests + * @base: Common crypto API algorithm data structure + */ +struct acomp_alg { + int (*compress)(struct acomp_req *req); + int (*decompress)(struct acomp_req *req); + void (*dst_free)(struct scatterlist *dst); + int (*init)(struct crypto_acomp *tfm); + void (*exit)(struct crypto_acomp *tfm); + unsigned int reqsize; + struct crypto_alg base; +}; + +/** + * DOC: Asynchronous Compression API + * + * The Asynchronous Compression API is used with the algorithms of type + * CRYPTO_ALG_TYPE_ACOMPRESS (listed as type "acomp" in /proc/crypto) + */ + +/** + * crypto_alloc_acomp() -- allocate ACOMPRESS tfm handle + * @alg_name: is the cra_name / name or cra_driver_name / driver name of the + * compression algorithm e.g. "deflate" + * @type: specifies the type of the algorithm + * @mask: specifies the mask for the algorithm + * + * Allocate a handle for a compression algorithm. The returned struct + * crypto_acomp is the handle that is required for any subsequent + * API invocation for the compression operations. + * + * Return: allocated handle in case of success; IS_ERR() is true in case + * of an error, PTR_ERR() returns the error code. + */ +struct crypto_acomp *crypto_alloc_acomp(const char *alg_name, u32 type, + u32 mask); + +static inline struct crypto_tfm *crypto_acomp_tfm(struct crypto_acomp *tfm) +{ + return &tfm->base; +} + +static inline struct acomp_alg *__crypto_acomp_alg(struct crypto_alg *alg) +{ + return container_of(alg, struct acomp_alg, base); +} + +static inline struct crypto_acomp *__crypto_acomp_tfm(struct crypto_tfm *tfm) +{ + return container_of(tfm, struct crypto_acomp, base); +} + +static inline struct acomp_alg *crypto_acomp_alg(struct crypto_acomp *tfm) +{ + return __crypto_acomp_alg(crypto_acomp_tfm(tfm)->__crt_alg); +} + +static inline unsigned int crypto_acomp_reqsize(struct crypto_acomp *tfm) +{ + return crypto_acomp_alg(tfm)->reqsize; +} + +static inline void acomp_request_set_tfm(struct acomp_req *req, + struct crypto_acomp *tfm) +{ + req->base.tfm = crypto_acomp_tfm(tfm); +} + +static inline struct crypto_acomp *crypto_acomp_reqtfm(struct acomp_req *req) +{ + return __crypto_acomp_tfm(req->base.tfm); +} + +/** + * crypto_free_acomp() -- free ACOMPRESS tfm handle + * + * @tfm: ACOMPRESS tfm handle allocated with crypto_alloc_acomp() + */ +static inline void crypto_free_acomp(struct crypto_acomp *tfm) +{ + crypto_destroy_tfm(tfm, crypto_acomp_tfm(tfm)); +} + +static inline int crypto_has_acomp(const char *alg_name, u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_ACOMPRESS; + mask |= CRYPTO_ALG_TYPE_MASK; + + return crypto_has_alg(alg_name, type, mask); +} + +/** + * acomp_request_alloc() -- allocates asynchronous (de)compression request + * + * @tfm: ACOMPRESS tfm handle allocated with crypto_alloc_acomp() + * + * Return: allocated handle in case of success or NULL in case of an error + */ +static inline struct acomp_req *acomp_request_alloc(struct crypto_acomp *tfm) +{ + struct acomp_req *req; + + req = kzalloc(sizeof(*req) + crypto_acomp_reqsize(tfm), GFP_KERNEL); + if (likely(req)) + acomp_request_set_tfm(req, tfm); + + return req; +} + +/** + * acomp_request_free() -- zeroize and free asynchronous (de)compression + * request as well as the output buffer if allocated + * inside the algorithm + * + * @req: request to free + */ +static inline void acomp_request_free(struct acomp_req *req) +{ + struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); + struct acomp_alg *alg = crypto_acomp_alg(tfm); + + if (req->flags & CRYPTO_ACOMP_ALLOC_OUTPUT) { + alg->dst_free(req->dst); + req->dst = NULL; + } + kzfree(req); +} + +/** + * acomp_request_set_callback() -- Sets an asynchronous callback + * + * Callback will be called when an asynchronous operation on a given + * request is finished. + * + * @req: request that the callback will be set for + * @flgs: specify for instance if the operation may backlog + * @cmlp: callback which will be called + * @data: private data used by the caller + */ +static inline void acomp_request_set_callback(struct acomp_req *req, + u32 flgs, + crypto_completion_t cmpl, + void *data) +{ + req->base.complete = cmpl; + req->base.data = data; + req->base.flags = flgs; +} + +/** + * acomp_request_set_params() -- Sets request parameters + * + * Sets parameters required by an acomp operation + * + * @req: asynchronous compress request + * @src: pointer to input buffer scatterlist + * @dst: pointer to output buffer scatterlist. If this is NULL, the + * acomp layer will allocate the output memory + * @slen: size of the input buffer + * @dlen: size of the output buffer. If dst is NULL, this can be used by + * the user to specify the maximum amount of memory to allocate + */ +static inline void acomp_request_set_params(struct acomp_req *req, + struct scatterlist *src, + struct scatterlist *dst, + unsigned int slen, + unsigned int dlen) +{ + req->src = src; + req->dst = dst; + req->slen = slen; + req->dlen = dlen; + + if (!req->dst) + req->flags |= CRYPTO_ACOMP_ALLOC_OUTPUT; +} + +/** + * crypto_acomp_compress() -- Invoke asynchronous compress operation + * + * Function invokes the asynchronous compress operation + * + * @req: asynchronous compress request + * + * Return: zero on success; error code in case of error + */ +static inline int crypto_acomp_compress(struct acomp_req *req) +{ + struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); + struct acomp_alg *alg = crypto_acomp_alg(tfm); + + return alg->compress(req); +} + +/** + * crypto_acomp_decompress() -- Invoke asynchronous decompress operation + * + * Function invokes the asynchronous decompress operation + * + * @req: asynchronous compress request + * + * Return: zero on success; error code in case of error + */ +static inline int crypto_acomp_decompress(struct acomp_req *req) +{ + struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); + struct acomp_alg *alg = crypto_acomp_alg(tfm); + + return alg->decompress(req); +} + +#endif diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h new file mode 100644 index 000000000000..a9a9000d1aea --- /dev/null +++ b/include/crypto/internal/acompress.h @@ -0,0 +1,66 @@ +/* + * Asynchronous Compression operations + * + * Copyright (c) 2016, Intel Corporation + * Authors: Weigang Li + * Giovanni Cabiddu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#ifndef _CRYPTO_ACOMP_INT_H +#define _CRYPTO_ACOMP_INT_H +#include + +/* + * Transform internal helpers. + */ +static inline void *acomp_request_ctx(struct acomp_req *req) +{ + return req->__ctx; +} + +static inline void *acomp_tfm_ctx(struct crypto_acomp *tfm) +{ + return tfm->base.__crt_ctx; +} + +static inline void acomp_request_complete(struct acomp_req *req, + int err) +{ + req->base.complete(&req->base, err); +} + +static inline const char *acomp_alg_name(struct crypto_acomp *tfm) +{ + return crypto_acomp_tfm(tfm)->__crt_alg->cra_name; +} + +/** + * crypto_register_acomp() -- Register asynchronous compression algorithm + * + * Function registers an implementation of an asynchronous + * compression algorithm + * + * @alg: algorithm definition + * + * Return: zero on success; error code in case of error + */ +int crypto_register_acomp(struct acomp_alg *alg); + +/** + * crypto_unregister_acomp() -- Unregister asynchronous compression algorithm + * + * Function unregisters an implementation of an asynchronous + * compression algorithm + * + * @alg: algorithm definition + * + * Return: zero on success; error code in case of error + */ +int crypto_unregister_acomp(struct acomp_alg *alg); + +#endif diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 7cee5551625b..dc57a0505505 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -50,6 +50,7 @@ #define CRYPTO_ALG_TYPE_SKCIPHER 0x00000005 #define CRYPTO_ALG_TYPE_GIVCIPHER 0x00000006 #define CRYPTO_ALG_TYPE_KPP 0x00000008 +#define CRYPTO_ALG_TYPE_ACOMPRESS 0x0000000a #define CRYPTO_ALG_TYPE_RNG 0x0000000c #define CRYPTO_ALG_TYPE_AKCIPHER 0x0000000d #define CRYPTO_ALG_TYPE_DIGEST 0x0000000e diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h index 79b5ded2001a..11d21fce14d6 100644 --- a/include/uapi/linux/cryptouser.h +++ b/include/uapi/linux/cryptouser.h @@ -46,6 +46,7 @@ enum crypto_attr_type_t { CRYPTOCFGA_REPORT_CIPHER, /* struct crypto_report_cipher */ CRYPTOCFGA_REPORT_AKCIPHER, /* struct crypto_report_akcipher */ CRYPTOCFGA_REPORT_KPP, /* struct crypto_report_kpp */ + CRYPTOCFGA_REPORT_ACOMP, /* struct crypto_report_acomp */ __CRYPTOCFGA_MAX #define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1) @@ -112,5 +113,9 @@ struct crypto_report_kpp { char type[CRYPTO_MAX_NAME]; }; +struct crypto_report_acomp { + char type[CRYPTO_MAX_NAME]; +}; + #define CRYPTO_REPORT_MAXSIZE (sizeof(struct crypto_user_alg) + \ sizeof(struct crypto_report_blkcipher)) -- cgit v1.2.3 From 1ab53a77b772bf7369464a0e4fa6fd6499acf8f1 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Fri, 21 Oct 2016 13:19:48 +0100 Subject: crypto: acomp - add driver-side scomp interface Add a synchronous back-end (scomp) to acomp. This allows to easily expose the already present compression algorithms in LKCF via acomp. Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- crypto/Makefile | 1 + crypto/acompress.c | 55 +++++- crypto/scompress.c | 356 ++++++++++++++++++++++++++++++++++++ include/crypto/acompress.h | 42 ++--- include/crypto/internal/acompress.h | 15 ++ include/crypto/internal/scompress.h | 136 ++++++++++++++ include/linux/crypto.h | 2 + 7 files changed, 578 insertions(+), 29 deletions(-) create mode 100644 crypto/scompress.c create mode 100644 include/crypto/internal/scompress.h (limited to 'include/linux') diff --git a/crypto/Makefile b/crypto/Makefile index 0933dc6bd24c..5c83f3dea119 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -51,6 +51,7 @@ rsa_generic-y += rsa-pkcs1pad.o obj-$(CONFIG_CRYPTO_RSA) += rsa_generic.o obj-$(CONFIG_CRYPTO_ACOMP2) += acompress.o +obj-$(CONFIG_CRYPTO_ACOMP2) += scompress.o cryptomgr-y := algboss.o testmgr.o diff --git a/crypto/acompress.c b/crypto/acompress.c index 4977279476d3..887783d8e9a9 100644 --- a/crypto/acompress.c +++ b/crypto/acompress.c @@ -22,8 +22,11 @@ #include #include #include +#include #include "internal.h" +static const struct crypto_type crypto_acomp_type; + #ifdef CONFIG_NET static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg) { @@ -67,6 +70,14 @@ static int crypto_acomp_init_tfm(struct crypto_tfm *tfm) struct crypto_acomp *acomp = __crypto_acomp_tfm(tfm); struct acomp_alg *alg = crypto_acomp_alg(acomp); + if (tfm->__crt_alg->cra_type != &crypto_acomp_type) + return crypto_init_scomp_ops_async(tfm); + + acomp->compress = alg->compress; + acomp->decompress = alg->decompress; + acomp->dst_free = alg->dst_free; + acomp->reqsize = alg->reqsize; + if (alg->exit) acomp->base.exit = crypto_acomp_exit_tfm; @@ -76,15 +87,25 @@ static int crypto_acomp_init_tfm(struct crypto_tfm *tfm) return 0; } +static unsigned int crypto_acomp_extsize(struct crypto_alg *alg) +{ + int extsize = crypto_alg_extsize(alg); + + if (alg->cra_type != &crypto_acomp_type) + extsize += sizeof(struct crypto_scomp *); + + return extsize; +} + static const struct crypto_type crypto_acomp_type = { - .extsize = crypto_alg_extsize, + .extsize = crypto_acomp_extsize, .init_tfm = crypto_acomp_init_tfm, #ifdef CONFIG_PROC_FS .show = crypto_acomp_show, #endif .report = crypto_acomp_report, .maskclear = ~CRYPTO_ALG_TYPE_MASK, - .maskset = CRYPTO_ALG_TYPE_MASK, + .maskset = CRYPTO_ALG_TYPE_ACOMPRESS_MASK, .type = CRYPTO_ALG_TYPE_ACOMPRESS, .tfmsize = offsetof(struct crypto_acomp, base), }; @@ -96,6 +117,36 @@ struct crypto_acomp *crypto_alloc_acomp(const char *alg_name, u32 type, } EXPORT_SYMBOL_GPL(crypto_alloc_acomp); +struct acomp_req *acomp_request_alloc(struct crypto_acomp *acomp) +{ + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp); + struct acomp_req *req; + + req = __acomp_request_alloc(acomp); + if (req && (tfm->__crt_alg->cra_type != &crypto_acomp_type)) + return crypto_acomp_scomp_alloc_ctx(req); + + return req; +} +EXPORT_SYMBOL_GPL(acomp_request_alloc); + +void acomp_request_free(struct acomp_req *req) +{ + struct crypto_acomp *acomp = crypto_acomp_reqtfm(req); + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp); + + if (tfm->__crt_alg->cra_type != &crypto_acomp_type) + crypto_acomp_scomp_free_ctx(req); + + if (req->flags & CRYPTO_ACOMP_ALLOC_OUTPUT) { + acomp->dst_free(req->dst); + req->dst = NULL; + } + + __acomp_request_free(req); +} +EXPORT_SYMBOL_GPL(acomp_request_free); + int crypto_register_acomp(struct acomp_alg *alg) { struct crypto_alg *base = &alg->base; diff --git a/crypto/scompress.c b/crypto/scompress.c new file mode 100644 index 000000000000..35e396d154b7 --- /dev/null +++ b/crypto/scompress.c @@ -0,0 +1,356 @@ +/* + * Synchronous Compression operations + * + * Copyright 2015 LG Electronics Inc. + * Copyright (c) 2016, Intel Corporation + * Author: Giovanni Cabiddu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +static const struct crypto_type crypto_scomp_type; +static void * __percpu *scomp_src_scratches; +static void * __percpu *scomp_dst_scratches; +static int scomp_scratch_users; +static DEFINE_MUTEX(scomp_lock); + +#ifdef CONFIG_NET +static int crypto_scomp_report(struct sk_buff *skb, struct crypto_alg *alg) +{ + struct crypto_report_comp rscomp; + + strncpy(rscomp.type, "scomp", sizeof(rscomp.type)); + + if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS, + sizeof(struct crypto_report_comp), &rscomp)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} +#else +static int crypto_scomp_report(struct sk_buff *skb, struct crypto_alg *alg) +{ + return -ENOSYS; +} +#endif + +static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg) + __attribute__ ((unused)); + +static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg) +{ + seq_puts(m, "type : scomp\n"); +} + +static int crypto_scomp_init_tfm(struct crypto_tfm *tfm) +{ + return 0; +} + +static void crypto_scomp_free_scratches(void * __percpu *scratches) +{ + int i; + + if (!scratches) + return; + + for_each_possible_cpu(i) + vfree(*per_cpu_ptr(scratches, i)); + + free_percpu(scratches); +} + +static void * __percpu *crypto_scomp_alloc_scratches(void) +{ + void * __percpu *scratches; + int i; + + scratches = alloc_percpu(void *); + if (!scratches) + return NULL; + + for_each_possible_cpu(i) { + void *scratch; + + scratch = vmalloc_node(SCOMP_SCRATCH_SIZE, cpu_to_node(i)); + if (!scratch) + goto error; + *per_cpu_ptr(scratches, i) = scratch; + } + + return scratches; + +error: + crypto_scomp_free_scratches(scratches); + return NULL; +} + +static void crypto_scomp_free_all_scratches(void) +{ + if (!--scomp_scratch_users) { + crypto_scomp_free_scratches(scomp_src_scratches); + crypto_scomp_free_scratches(scomp_dst_scratches); + scomp_src_scratches = NULL; + scomp_dst_scratches = NULL; + } +} + +static int crypto_scomp_alloc_all_scratches(void) +{ + if (!scomp_scratch_users++) { + scomp_src_scratches = crypto_scomp_alloc_scratches(); + if (!scomp_src_scratches) + return -ENOMEM; + scomp_dst_scratches = crypto_scomp_alloc_scratches(); + if (!scomp_dst_scratches) + return -ENOMEM; + } + return 0; +} + +static void crypto_scomp_sg_free(struct scatterlist *sgl) +{ + int i, n; + struct page *page; + + if (!sgl) + return; + + n = sg_nents(sgl); + for_each_sg(sgl, sgl, n, i) { + page = sg_page(sgl); + if (page) + __free_page(page); + } + + kfree(sgl); +} + +static struct scatterlist *crypto_scomp_sg_alloc(size_t size, gfp_t gfp) +{ + struct scatterlist *sgl; + struct page *page; + int i, n; + + n = ((size - 1) >> PAGE_SHIFT) + 1; + + sgl = kmalloc_array(n, sizeof(struct scatterlist), gfp); + if (!sgl) + return NULL; + + sg_init_table(sgl, n); + + for (i = 0; i < n; i++) { + page = alloc_page(gfp); + if (!page) + goto err; + sg_set_page(sgl + i, page, PAGE_SIZE, 0); + } + + return sgl; + +err: + sg_mark_end(sgl + i); + crypto_scomp_sg_free(sgl); + return NULL; +} + +static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir) +{ + struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); + void **tfm_ctx = acomp_tfm_ctx(tfm); + struct crypto_scomp *scomp = *tfm_ctx; + void **ctx = acomp_request_ctx(req); + const int cpu = get_cpu(); + u8 *scratch_src = *per_cpu_ptr(scomp_src_scratches, cpu); + u8 *scratch_dst = *per_cpu_ptr(scomp_dst_scratches, cpu); + int ret; + + if (!req->src || !req->slen || req->slen > SCOMP_SCRATCH_SIZE) { + ret = -EINVAL; + goto out; + } + + if (req->dst && !req->dlen) { + ret = -EINVAL; + goto out; + } + + if (!req->dlen || req->dlen > SCOMP_SCRATCH_SIZE) + req->dlen = SCOMP_SCRATCH_SIZE; + + scatterwalk_map_and_copy(scratch_src, req->src, 0, req->slen, 0); + if (dir) + ret = crypto_scomp_compress(scomp, scratch_src, req->slen, + scratch_dst, &req->dlen, *ctx); + else + ret = crypto_scomp_decompress(scomp, scratch_src, req->slen, + scratch_dst, &req->dlen, *ctx); + if (!ret) { + if (!req->dst) { + req->dst = crypto_scomp_sg_alloc(req->dlen, + req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? + GFP_KERNEL : GFP_ATOMIC); + if (!req->dst) + goto out; + } + scatterwalk_map_and_copy(scratch_dst, req->dst, 0, req->dlen, + 1); + } +out: + put_cpu(); + return ret; +} + +static int scomp_acomp_compress(struct acomp_req *req) +{ + return scomp_acomp_comp_decomp(req, 1); +} + +static int scomp_acomp_decompress(struct acomp_req *req) +{ + return scomp_acomp_comp_decomp(req, 0); +} + +static void crypto_exit_scomp_ops_async(struct crypto_tfm *tfm) +{ + struct crypto_scomp **ctx = crypto_tfm_ctx(tfm); + + crypto_free_scomp(*ctx); +} + +int crypto_init_scomp_ops_async(struct crypto_tfm *tfm) +{ + struct crypto_alg *calg = tfm->__crt_alg; + struct crypto_acomp *crt = __crypto_acomp_tfm(tfm); + struct crypto_scomp **ctx = crypto_tfm_ctx(tfm); + struct crypto_scomp *scomp; + + if (!crypto_mod_get(calg)) + return -EAGAIN; + + scomp = crypto_create_tfm(calg, &crypto_scomp_type); + if (IS_ERR(scomp)) { + crypto_mod_put(calg); + return PTR_ERR(scomp); + } + + *ctx = scomp; + tfm->exit = crypto_exit_scomp_ops_async; + + crt->compress = scomp_acomp_compress; + crt->decompress = scomp_acomp_decompress; + crt->dst_free = crypto_scomp_sg_free; + crt->reqsize = sizeof(void *); + + return 0; +} + +struct acomp_req *crypto_acomp_scomp_alloc_ctx(struct acomp_req *req) +{ + struct crypto_acomp *acomp = crypto_acomp_reqtfm(req); + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp); + struct crypto_scomp **tfm_ctx = crypto_tfm_ctx(tfm); + struct crypto_scomp *scomp = *tfm_ctx; + void *ctx; + + ctx = crypto_scomp_alloc_ctx(scomp); + if (IS_ERR(ctx)) { + kfree(req); + return NULL; + } + + *req->__ctx = ctx; + + return req; +} + +void crypto_acomp_scomp_free_ctx(struct acomp_req *req) +{ + struct crypto_acomp *acomp = crypto_acomp_reqtfm(req); + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp); + struct crypto_scomp **tfm_ctx = crypto_tfm_ctx(tfm); + struct crypto_scomp *scomp = *tfm_ctx; + void *ctx = *req->__ctx; + + if (ctx) + crypto_scomp_free_ctx(scomp, ctx); +} + +static const struct crypto_type crypto_scomp_type = { + .extsize = crypto_alg_extsize, + .init_tfm = crypto_scomp_init_tfm, +#ifdef CONFIG_PROC_FS + .show = crypto_scomp_show, +#endif + .report = crypto_scomp_report, + .maskclear = ~CRYPTO_ALG_TYPE_MASK, + .maskset = CRYPTO_ALG_TYPE_MASK, + .type = CRYPTO_ALG_TYPE_SCOMPRESS, + .tfmsize = offsetof(struct crypto_scomp, base), +}; + +int crypto_register_scomp(struct scomp_alg *alg) +{ + struct crypto_alg *base = &alg->base; + int ret = -ENOMEM; + + mutex_lock(&scomp_lock); + if (crypto_scomp_alloc_all_scratches()) + goto error; + + base->cra_type = &crypto_scomp_type; + base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; + base->cra_flags |= CRYPTO_ALG_TYPE_SCOMPRESS; + + ret = crypto_register_alg(base); + if (ret) + goto error; + + mutex_unlock(&scomp_lock); + return ret; + +error: + crypto_scomp_free_all_scratches(); + mutex_unlock(&scomp_lock); + return ret; +} +EXPORT_SYMBOL_GPL(crypto_register_scomp); + +int crypto_unregister_scomp(struct scomp_alg *alg) +{ + int ret; + + mutex_lock(&scomp_lock); + ret = crypto_unregister_alg(&alg->base); + crypto_scomp_free_all_scratches(); + mutex_unlock(&scomp_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(crypto_unregister_scomp); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Synchronous compression type"); diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h index 14c70d887160..e328b52425a8 100644 --- a/include/crypto/acompress.h +++ b/include/crypto/acompress.h @@ -42,9 +42,18 @@ struct acomp_req { * struct crypto_acomp - user-instantiated objects which encapsulate * algorithms and core processing logic * - * @base: Common crypto API algorithm data structure + * @compress: Function performs a compress operation + * @decompress: Function performs a de-compress operation + * @dst_free: Frees destination buffer if allocated inside the + * algorithm + * @reqsize: Context size for (de)compression requests + * @base: Common crypto API algorithm data structure */ struct crypto_acomp { + int (*compress)(struct acomp_req *req); + int (*decompress)(struct acomp_req *req); + void (*dst_free)(struct scatterlist *dst); + unsigned int reqsize; struct crypto_tfm base; }; @@ -125,7 +134,7 @@ static inline struct acomp_alg *crypto_acomp_alg(struct crypto_acomp *tfm) static inline unsigned int crypto_acomp_reqsize(struct crypto_acomp *tfm) { - return crypto_acomp_alg(tfm)->reqsize; + return tfm->reqsize; } static inline void acomp_request_set_tfm(struct acomp_req *req, @@ -165,16 +174,7 @@ static inline int crypto_has_acomp(const char *alg_name, u32 type, u32 mask) * * Return: allocated handle in case of success or NULL in case of an error */ -static inline struct acomp_req *acomp_request_alloc(struct crypto_acomp *tfm) -{ - struct acomp_req *req; - - req = kzalloc(sizeof(*req) + crypto_acomp_reqsize(tfm), GFP_KERNEL); - if (likely(req)) - acomp_request_set_tfm(req, tfm); - - return req; -} +struct acomp_req *acomp_request_alloc(struct crypto_acomp *tfm); /** * acomp_request_free() -- zeroize and free asynchronous (de)compression @@ -183,17 +183,7 @@ static inline struct acomp_req *acomp_request_alloc(struct crypto_acomp *tfm) * * @req: request to free */ -static inline void acomp_request_free(struct acomp_req *req) -{ - struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); - struct acomp_alg *alg = crypto_acomp_alg(tfm); - - if (req->flags & CRYPTO_ACOMP_ALLOC_OUTPUT) { - alg->dst_free(req->dst); - req->dst = NULL; - } - kzfree(req); -} +void acomp_request_free(struct acomp_req *req); /** * acomp_request_set_callback() -- Sets an asynchronous callback @@ -256,9 +246,8 @@ static inline void acomp_request_set_params(struct acomp_req *req, static inline int crypto_acomp_compress(struct acomp_req *req) { struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); - struct acomp_alg *alg = crypto_acomp_alg(tfm); - return alg->compress(req); + return tfm->compress(req); } /** @@ -273,9 +262,8 @@ static inline int crypto_acomp_compress(struct acomp_req *req) static inline int crypto_acomp_decompress(struct acomp_req *req) { struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); - struct acomp_alg *alg = crypto_acomp_alg(tfm); - return alg->decompress(req); + return tfm->decompress(req); } #endif diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index a9a9000d1aea..1de2b5af12d7 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -39,6 +39,21 @@ static inline const char *acomp_alg_name(struct crypto_acomp *tfm) return crypto_acomp_tfm(tfm)->__crt_alg->cra_name; } +static inline struct acomp_req *__acomp_request_alloc(struct crypto_acomp *tfm) +{ + struct acomp_req *req; + + req = kzalloc(sizeof(*req) + crypto_acomp_reqsize(tfm), GFP_KERNEL); + if (likely(req)) + acomp_request_set_tfm(req, tfm); + return req; +} + +static inline void __acomp_request_free(struct acomp_req *req) +{ + kzfree(req); +} + /** * crypto_register_acomp() -- Register asynchronous compression algorithm * diff --git a/include/crypto/internal/scompress.h b/include/crypto/internal/scompress.h new file mode 100644 index 000000000000..3fda3c5655a0 --- /dev/null +++ b/include/crypto/internal/scompress.h @@ -0,0 +1,136 @@ +/* + * Synchronous Compression operations + * + * Copyright 2015 LG Electronics Inc. + * Copyright (c) 2016, Intel Corporation + * Author: Giovanni Cabiddu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#ifndef _CRYPTO_SCOMP_INT_H +#define _CRYPTO_SCOMP_INT_H +#include + +#define SCOMP_SCRATCH_SIZE 131072 + +struct crypto_scomp { + struct crypto_tfm base; +}; + +/** + * struct scomp_alg - synchronous compression algorithm + * + * @alloc_ctx: Function allocates algorithm specific context + * @free_ctx: Function frees context allocated with alloc_ctx + * @compress: Function performs a compress operation + * @decompress: Function performs a de-compress operation + * @init: Initialize the cryptographic transformation object. + * This function is used to initialize the cryptographic + * transformation object. This function is called only once at + * the instantiation time, right after the transformation context + * was allocated. In case the cryptographic hardware has some + * special requirements which need to be handled by software, this + * function shall check for the precise requirement of the + * transformation and put any software fallbacks in place. + * @exit: Deinitialize the cryptographic transformation object. This is a + * counterpart to @init, used to remove various changes set in + * @init. + * @base: Common crypto API algorithm data structure + */ +struct scomp_alg { + void *(*alloc_ctx)(struct crypto_scomp *tfm); + void (*free_ctx)(struct crypto_scomp *tfm, void *ctx); + int (*compress)(struct crypto_scomp *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen, + void *ctx); + int (*decompress)(struct crypto_scomp *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen, + void *ctx); + struct crypto_alg base; +}; + +static inline struct scomp_alg *__crypto_scomp_alg(struct crypto_alg *alg) +{ + return container_of(alg, struct scomp_alg, base); +} + +static inline struct crypto_scomp *__crypto_scomp_tfm(struct crypto_tfm *tfm) +{ + return container_of(tfm, struct crypto_scomp, base); +} + +static inline struct crypto_tfm *crypto_scomp_tfm(struct crypto_scomp *tfm) +{ + return &tfm->base; +} + +static inline void crypto_free_scomp(struct crypto_scomp *tfm) +{ + crypto_destroy_tfm(tfm, crypto_scomp_tfm(tfm)); +} + +static inline struct scomp_alg *crypto_scomp_alg(struct crypto_scomp *tfm) +{ + return __crypto_scomp_alg(crypto_scomp_tfm(tfm)->__crt_alg); +} + +static inline void *crypto_scomp_alloc_ctx(struct crypto_scomp *tfm) +{ + return crypto_scomp_alg(tfm)->alloc_ctx(tfm); +} + +static inline void crypto_scomp_free_ctx(struct crypto_scomp *tfm, + void *ctx) +{ + return crypto_scomp_alg(tfm)->free_ctx(tfm, ctx); +} + +static inline int crypto_scomp_compress(struct crypto_scomp *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, void *ctx) +{ + return crypto_scomp_alg(tfm)->compress(tfm, src, slen, dst, dlen, ctx); +} + +static inline int crypto_scomp_decompress(struct crypto_scomp *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, + void *ctx) +{ + return crypto_scomp_alg(tfm)->decompress(tfm, src, slen, dst, dlen, + ctx); +} + +int crypto_init_scomp_ops_async(struct crypto_tfm *tfm); +struct acomp_req *crypto_acomp_scomp_alloc_ctx(struct acomp_req *req); +void crypto_acomp_scomp_free_ctx(struct acomp_req *req); + +/** + * crypto_register_scomp() -- Register synchronous compression algorithm + * + * Function registers an implementation of a synchronous + * compression algorithm + * + * @alg: algorithm definition + * + * Return: zero on success; error code in case of error + */ +int crypto_register_scomp(struct scomp_alg *alg); + +/** + * crypto_unregister_scomp() -- Unregister synchronous compression algorithm + * + * Function unregisters an implementation of a synchronous + * compression algorithm + * + * @alg: algorithm definition + * + * Return: zero on success; error code in case of error + */ +int crypto_unregister_scomp(struct scomp_alg *alg); + +#endif diff --git a/include/linux/crypto.h b/include/linux/crypto.h index dc57a0505505..8348d83d8b5e 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -51,6 +51,7 @@ #define CRYPTO_ALG_TYPE_GIVCIPHER 0x00000006 #define CRYPTO_ALG_TYPE_KPP 0x00000008 #define CRYPTO_ALG_TYPE_ACOMPRESS 0x0000000a +#define CRYPTO_ALG_TYPE_SCOMPRESS 0x0000000b #define CRYPTO_ALG_TYPE_RNG 0x0000000c #define CRYPTO_ALG_TYPE_AKCIPHER 0x0000000d #define CRYPTO_ALG_TYPE_DIGEST 0x0000000e @@ -61,6 +62,7 @@ #define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e #define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e #define CRYPTO_ALG_TYPE_BLKCIPHER_MASK 0x0000000c +#define CRYPTO_ALG_TYPE_ACOMPRESS_MASK 0x0000000e #define CRYPTO_ALG_LARVAL 0x00000010 #define CRYPTO_ALG_DEAD 0x00000020 -- cgit v1.2.3 From a225023828038a1aaea876a65313c863ec23fa44 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 19 Oct 2016 15:45:27 +0200 Subject: sched/core: Explain sleep/wakeup in a better way There were a few questions wrt. how sleep-wakeup works. Try and explain it more. Requested-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 52 +++++++++++++++++++++++++++++++++++---------------- kernel/sched/core.c | 17 +++++++++-------- 2 files changed, 45 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 348f51b0ec92..3762fe4e3a80 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -262,20 +262,9 @@ extern char ___assert_task_state[1 - 2*!!( #define set_task_state(tsk, state_value) \ do { \ (tsk)->task_state_change = _THIS_IP_; \ - smp_store_mb((tsk)->state, (state_value)); \ + smp_store_mb((tsk)->state, (state_value)); \ } while (0) -/* - * set_current_state() includes a barrier so that the write of current->state - * is correctly serialised wrt the caller's subsequent test of whether to - * actually sleep: - * - * set_current_state(TASK_UNINTERRUPTIBLE); - * if (do_i_need_to_sleep()) - * schedule(); - * - * If the caller does not need such serialisation then use __set_current_state() - */ #define __set_current_state(state_value) \ do { \ current->task_state_change = _THIS_IP_; \ @@ -284,11 +273,19 @@ extern char ___assert_task_state[1 - 2*!!( #define set_current_state(state_value) \ do { \ current->task_state_change = _THIS_IP_; \ - smp_store_mb(current->state, (state_value)); \ + smp_store_mb(current->state, (state_value)); \ } while (0) #else +/* + * @tsk had better be current, or you get to keep the pieces. + * + * The only reason is that computing current can be more expensive than + * using a pointer that's already available. + * + * Therefore, see set_current_state(). + */ #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) #define set_task_state(tsk, state_value) \ @@ -299,11 +296,34 @@ extern char ___assert_task_state[1 - 2*!!( * is correctly serialised wrt the caller's subsequent test of whether to * actually sleep: * + * for (;;) { * set_current_state(TASK_UNINTERRUPTIBLE); - * if (do_i_need_to_sleep()) - * schedule(); + * if (!need_sleep) + * break; + * + * schedule(); + * } + * __set_current_state(TASK_RUNNING); + * + * If the caller does not need such serialisation (because, for instance, the + * condition test and condition change and wakeup are under the same lock) then + * use __set_current_state(). + * + * The above is typically ordered against the wakeup, which does: + * + * need_sleep = false; + * wake_up_state(p, TASK_UNINTERRUPTIBLE); + * + * Where wake_up_state() (and all other wakeup primitives) imply enough + * barriers to order the store of the variable against wakeup. + * + * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is, + * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a + * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING). + * + * This is obviously fine, since they both store the exact same value. * - * If the caller does not need such serialisation then use __set_current_state() + * Also see the comments of try_to_wake_up(). */ #define __set_current_state(state_value) \ do { current->state = (state_value); } while (0) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 94732d1ab00a..b8c86ba44ca9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1995,14 +1995,15 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) * @state: the mask of task states that can be woken * @wake_flags: wake modifier flags (WF_*) * - * Put it on the run-queue if it's not already there. The "current" - * thread is always on the run-queue (except when the actual - * re-schedule is in progress), and as such you're allowed to do - * the simpler "current->state = TASK_RUNNING" to mark yourself - * runnable without the overhead of this. - * - * Return: %true if @p was woken up, %false if it was already running. - * or @state didn't match @p's state. + * If (@state & @p->state) @p->state = TASK_RUNNING. + * + * If the task was not queued/runnable, also place it back on a runqueue. + * + * Atomic against schedule() which would dequeue a task, also see + * set_current_state(). + * + * Return: %true if @p->state changes (an actual wakeup was done), + * %false otherwise. */ static int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) -- cgit v1.2.3 From 3ca0ff571b092ee4d807f1168caa428d95b0173b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 23 Aug 2016 13:36:04 +0200 Subject: locking/mutex: Rework mutex::owner The current mutex implementation has an atomic lock word and a non-atomic owner field. This disparity leads to a number of issues with the current mutex code as it means that we can have a locked mutex without an explicit owner (because the owner field has not been set, or already cleared). This leads to a number of weird corner cases, esp. between the optimistic spinning and debug code. Where the optimistic spinning code needs the owner field updated inside the lock region, the debug code is more relaxed because the whole lock is serialized by the wait_lock. Also, the spinning code itself has a few corner cases where we need to deal with a held lock without an owner field. Furthermore, it becomes even more of a problem when trying to fix starvation cases in the current code. We end up stacking special case on special case. To solve this rework the basic mutex implementation to be a single atomic word that contains the owner and uses the low bits for extra state. This matches how PI futexes and rt_mutex already work. By having the owner an integral part of the lock state a lot of the problems dissapear and we get a better option to deal with starvation cases, direct owner handoff. Changing the basic mutex does however invalidate all the arch specific mutex code; this patch leaves that unused in-place, a later patch will remove that. Tested-by: Jason Low Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Will Deacon Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/mutex-debug.h | 24 --- include/linux/mutex.h | 46 ++++-- kernel/locking/mutex-debug.c | 13 -- kernel/locking/mutex-debug.h | 10 -- kernel/locking/mutex.c | 371 ++++++++++++++++++------------------------- kernel/locking/mutex.h | 26 --- kernel/sched/core.c | 2 +- 7 files changed, 187 insertions(+), 305 deletions(-) delete mode 100644 include/linux/mutex-debug.h (limited to 'include/linux') diff --git a/include/linux/mutex-debug.h b/include/linux/mutex-debug.h deleted file mode 100644 index 4ac8b1977b73..000000000000 --- a/include/linux/mutex-debug.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef __LINUX_MUTEX_DEBUG_H -#define __LINUX_MUTEX_DEBUG_H - -#include -#include -#include - -/* - * Mutexes - debugging helpers: - */ - -#define __DEBUG_MUTEX_INITIALIZER(lockname) \ - , .magic = &lockname - -#define mutex_init(mutex) \ -do { \ - static struct lock_class_key __key; \ - \ - __mutex_init((mutex), #mutex, &__key); \ -} while (0) - -extern void mutex_destroy(struct mutex *lock); - -#endif diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 2cb7531e7d7a..4d3bccabbea5 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -18,6 +18,7 @@ #include #include #include +#include /* * Simple, straightforward mutexes with strict semantics: @@ -48,16 +49,12 @@ * locks and tasks (and only those tasks) */ struct mutex { - /* 1: unlocked, 0: locked, negative: locked, possible waiters */ - atomic_t count; + atomic_long_t owner; spinlock_t wait_lock; - struct list_head wait_list; -#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER) - struct task_struct *owner; -#endif #ifdef CONFIG_MUTEX_SPIN_ON_OWNER struct optimistic_spin_queue osq; /* Spinner MCS lock */ #endif + struct list_head wait_list; #ifdef CONFIG_DEBUG_MUTEXES void *magic; #endif @@ -66,6 +63,11 @@ struct mutex { #endif }; +static inline struct task_struct *__mutex_owner(struct mutex *lock) +{ + return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x03); +} + /* * This is the control structure for tasks blocked on mutex, * which resides on the blocked task's kernel stack: @@ -79,9 +81,20 @@ struct mutex_waiter { }; #ifdef CONFIG_DEBUG_MUTEXES -# include + +#define __DEBUG_MUTEX_INITIALIZER(lockname) \ + , .magic = &lockname + +extern void mutex_destroy(struct mutex *lock); + #else + # define __DEBUG_MUTEX_INITIALIZER(lockname) + +static inline void mutex_destroy(struct mutex *lock) {} + +#endif + /** * mutex_init - initialize the mutex * @mutex: the mutex to be initialized @@ -90,14 +103,12 @@ struct mutex_waiter { * * It is not allowed to initialize an already locked mutex. */ -# define mutex_init(mutex) \ -do { \ - static struct lock_class_key __key; \ - \ - __mutex_init((mutex), #mutex, &__key); \ +#define mutex_init(mutex) \ +do { \ + static struct lock_class_key __key; \ + \ + __mutex_init((mutex), #mutex, &__key); \ } while (0) -static inline void mutex_destroy(struct mutex *lock) {} -#endif #ifdef CONFIG_DEBUG_LOCK_ALLOC # define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ @@ -107,7 +118,7 @@ static inline void mutex_destroy(struct mutex *lock) {} #endif #define __MUTEX_INITIALIZER(lockname) \ - { .count = ATOMIC_INIT(1) \ + { .owner = ATOMIC_LONG_INIT(0) \ , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ , .wait_list = LIST_HEAD_INIT(lockname.wait_list) \ __DEBUG_MUTEX_INITIALIZER(lockname) \ @@ -127,7 +138,10 @@ extern void __mutex_init(struct mutex *lock, const char *name, */ static inline int mutex_is_locked(struct mutex *lock) { - return atomic_read(&lock->count) != 1; + /* + * XXX think about spin_is_locked + */ + return __mutex_owner(lock) != NULL; } /* diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index 9c951fade415..9aa713629387 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -73,21 +73,8 @@ void debug_mutex_unlock(struct mutex *lock) { if (likely(debug_locks)) { DEBUG_LOCKS_WARN_ON(lock->magic != lock); - - if (!lock->owner) - DEBUG_LOCKS_WARN_ON(!lock->owner); - else - DEBUG_LOCKS_WARN_ON(lock->owner != current); - DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); } - - /* - * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug - * mutexes so that we can do it here after we've verified state. - */ - mutex_clear_owner(lock); - atomic_set(&lock->count, 1); } void debug_mutex_init(struct mutex *lock, const char *name, diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h index 57a871ae3c81..a459faa48987 100644 --- a/kernel/locking/mutex-debug.h +++ b/kernel/locking/mutex-debug.h @@ -27,16 +27,6 @@ extern void debug_mutex_unlock(struct mutex *lock); extern void debug_mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key); -static inline void mutex_set_owner(struct mutex *lock) -{ - WRITE_ONCE(lock->owner, current); -} - -static inline void mutex_clear_owner(struct mutex *lock) -{ - WRITE_ONCE(lock->owner, NULL); -} - #define spin_lock_mutex(lock, flags) \ do { \ struct mutex *l = container_of(lock, struct mutex, wait_lock); \ diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index a70b90db3909..de1ce0bae0d5 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -27,41 +27,113 @@ #include #include -/* - * In the DEBUG case we are using the "NULL fastpath" for mutexes, - * which forces all calls into the slowpath: - */ #ifdef CONFIG_DEBUG_MUTEXES # include "mutex-debug.h" -# include -/* - * Must be 0 for the debug case so we do not do the unlock outside of the - * wait_lock region. debug_mutex_unlock() will do the actual unlock in this - * case. - */ -# undef __mutex_slowpath_needs_to_unlock -# define __mutex_slowpath_needs_to_unlock() 0 #else # include "mutex.h" -# include #endif void __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) { - atomic_set(&lock->count, 1); + atomic_long_set(&lock->owner, 0); spin_lock_init(&lock->wait_lock); INIT_LIST_HEAD(&lock->wait_list); - mutex_clear_owner(lock); #ifdef CONFIG_MUTEX_SPIN_ON_OWNER osq_lock_init(&lock->osq); #endif debug_mutex_init(lock, name, key); } - EXPORT_SYMBOL(__mutex_init); +/* + * @owner: contains: 'struct task_struct *' to the current lock owner, + * NULL means not owned. Since task_struct pointers are aligned at + * ARCH_MIN_TASKALIGN (which is at least sizeof(void *)), we have low + * bits to store extra state. + * + * Bit0 indicates a non-empty waiter list; unlock must issue a wakeup. + */ +#define MUTEX_FLAG_WAITERS 0x01 + +#define MUTEX_FLAGS 0x03 + +static inline struct task_struct *__owner_task(unsigned long owner) +{ + return (struct task_struct *)(owner & ~MUTEX_FLAGS); +} + +static inline unsigned long __owner_flags(unsigned long owner) +{ + return owner & MUTEX_FLAGS; +} + +/* + * Actual trylock that will work on any unlocked state. + */ +static inline bool __mutex_trylock(struct mutex *lock) +{ + unsigned long owner, curr = (unsigned long)current; + + owner = atomic_long_read(&lock->owner); + for (;;) { /* must loop, can race against a flag */ + unsigned long old; + + if (__owner_task(owner)) + return false; + + old = atomic_long_cmpxchg_acquire(&lock->owner, owner, + curr | __owner_flags(owner)); + if (old == owner) + return true; + + owner = old; + } +} + +#ifndef CONFIG_DEBUG_LOCK_ALLOC +/* + * Lockdep annotations are contained to the slow paths for simplicity. + * There is nothing that would stop spreading the lockdep annotations outwards + * except more code. + */ + +/* + * Optimistic trylock that only works in the uncontended case. Make sure to + * follow with a __mutex_trylock() before failing. + */ +static __always_inline bool __mutex_trylock_fast(struct mutex *lock) +{ + unsigned long curr = (unsigned long)current; + + if (!atomic_long_cmpxchg_acquire(&lock->owner, 0UL, curr)) + return true; + + return false; +} + +static __always_inline bool __mutex_unlock_fast(struct mutex *lock) +{ + unsigned long curr = (unsigned long)current; + + if (atomic_long_cmpxchg_release(&lock->owner, curr, 0UL) == curr) + return true; + + return false; +} +#endif + +static inline void __mutex_set_flag(struct mutex *lock, unsigned long flag) +{ + atomic_long_or(flag, &lock->owner); +} + +static inline void __mutex_clear_flag(struct mutex *lock, unsigned long flag) +{ + atomic_long_andnot(flag, &lock->owner); +} + #ifndef CONFIG_DEBUG_LOCK_ALLOC /* * We split the mutex lock/unlock logic into separate fastpath and @@ -69,7 +141,7 @@ EXPORT_SYMBOL(__mutex_init); * We also put the fastpath first in the kernel image, to make sure the * branch is predicted by the CPU as default-untaken. */ -__visible void __sched __mutex_lock_slowpath(atomic_t *lock_count); +static void __sched __mutex_lock_slowpath(struct mutex *lock); /** * mutex_lock - acquire the mutex @@ -95,14 +167,10 @@ __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count); void __sched mutex_lock(struct mutex *lock) { might_sleep(); - /* - * The locking fastpath is the 1->0 transition from - * 'unlocked' into 'locked' state. - */ - __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath); - mutex_set_owner(lock); -} + if (!__mutex_trylock_fast(lock)) + __mutex_lock_slowpath(lock); +} EXPORT_SYMBOL(mutex_lock); #endif @@ -149,9 +217,6 @@ static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, /* * After acquiring lock with fastpath or when we lost out in contested * slowpath, set ctx and wake up any waiters so they can recheck. - * - * This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set, - * as the fastpath and opportunistic spinning are disabled in that case. */ static __always_inline void ww_mutex_set_context_fastpath(struct ww_mutex *lock, @@ -176,7 +241,7 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, /* * Check if lock is contended, if not there is nobody to wake up */ - if (likely(atomic_read(&lock->base.count) == 0)) + if (likely(!(atomic_long_read(&lock->base.owner) & MUTEX_FLAG_WAITERS))) return; /* @@ -227,7 +292,7 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) bool ret = true; rcu_read_lock(); - while (lock->owner == owner) { + while (__mutex_owner(lock) == owner) { /* * Ensure we emit the owner->on_cpu, dereference _after_ * checking lock->owner still matches owner. If that fails, @@ -260,26 +325,19 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) return 0; rcu_read_lock(); - owner = READ_ONCE(lock->owner); + owner = __mutex_owner(lock); if (owner) retval = owner->on_cpu; rcu_read_unlock(); + /* - * if lock->owner is not set, the mutex owner may have just acquired - * it and not set the owner yet or the mutex has been released. + * If lock->owner is not set, the mutex has been released. Return true + * such that we'll trylock in the spin path, which is a faster option + * than the blocking slow path. */ return retval; } -/* - * Atomically try to take the lock when it is available - */ -static inline bool mutex_try_to_acquire(struct mutex *lock) -{ - return !mutex_is_locked(lock) && - (atomic_cmpxchg_acquire(&lock->count, 1, 0) == 1); -} - /* * Optimistic spinning. * @@ -288,13 +346,6 @@ static inline bool mutex_try_to_acquire(struct mutex *lock) * need to reschedule. The rationale is that if the lock owner is * running, it is likely to release the lock soon. * - * Since this needs the lock owner, and this mutex implementation - * doesn't track the owner atomically in the lock field, we need to - * track it non-atomically. - * - * We can't do this for DEBUG_MUTEXES because that relies on wait_lock - * to serialize everything. - * * The mutex spinners are queued up using MCS lock so that only one * spinner can compete for the mutex. However, if mutex spinning isn't * going to happen, there is no point in going through the lock/unlock @@ -342,35 +393,16 @@ static bool mutex_optimistic_spin(struct mutex *lock, * If there's an owner, wait for it to either * release the lock or go to sleep. */ - owner = READ_ONCE(lock->owner); + owner = __mutex_owner(lock); if (owner && !mutex_spin_on_owner(lock, owner)) break; /* Try to acquire the mutex if it is unlocked. */ - if (mutex_try_to_acquire(lock)) { - lock_acquired(&lock->dep_map, ip); - - if (use_ww_ctx) { - struct ww_mutex *ww; - ww = container_of(lock, struct ww_mutex, base); - - ww_mutex_set_context_fastpath(ww, ww_ctx); - } - - mutex_set_owner(lock); + if (__mutex_trylock(lock)) { osq_unlock(&lock->osq); return true; } - /* - * When there's no owner, we might have preempted between the - * owner acquiring the lock and setting the owner field. If - * we're an RT task that will live-lock because we won't let - * the owner complete. - */ - if (!owner && (need_resched() || rt_task(task))) - break; - /* * The cpu_relax() call is a compiler barrier which forces * everything in this loop to be re-loaded. We don't need @@ -406,8 +438,7 @@ static bool mutex_optimistic_spin(struct mutex *lock, } #endif -__visible __used noinline -void __sched __mutex_unlock_slowpath(atomic_t *lock_count); +static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip); /** * mutex_unlock - release the mutex @@ -422,21 +453,12 @@ void __sched __mutex_unlock_slowpath(atomic_t *lock_count); */ void __sched mutex_unlock(struct mutex *lock) { - /* - * The unlocking fastpath is the 0->1 transition from 'locked' - * into 'unlocked' state: - */ -#ifndef CONFIG_DEBUG_MUTEXES - /* - * When debugging is enabled we must not clear the owner before time, - * the slow path will always be taken, and that clears the owner field - * after verifying that it was indeed current. - */ - mutex_clear_owner(lock); +#ifndef CONFIG_DEBUG_LOCK_ALLOC + if (__mutex_unlock_fast(lock)) + return; #endif - __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath); + __mutex_unlock_slowpath(lock, _RET_IP_); } - EXPORT_SYMBOL(mutex_unlock); /** @@ -465,15 +487,7 @@ void __sched ww_mutex_unlock(struct ww_mutex *lock) lock->ctx = NULL; } -#ifndef CONFIG_DEBUG_MUTEXES - /* - * When debugging is enabled we must not clear the owner before time, - * the slow path will always be taken, and that clears the owner field - * after verifying that it was indeed current. - */ - mutex_clear_owner(&lock->base); -#endif - __mutex_fastpath_unlock(&lock->base.count, __mutex_unlock_slowpath); + mutex_unlock(&lock->base); } EXPORT_SYMBOL(ww_mutex_unlock); @@ -520,20 +534,24 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, preempt_disable(); mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); - if (mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx)) { + if (__mutex_trylock(lock) || mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx)) { /* got the lock, yay! */ + lock_acquired(&lock->dep_map, ip); + if (use_ww_ctx) { + struct ww_mutex *ww; + ww = container_of(lock, struct ww_mutex, base); + + ww_mutex_set_context_fastpath(ww, ww_ctx); + } preempt_enable(); return 0; } spin_lock_mutex(&lock->wait_lock, flags); - /* - * Once more, try to acquire the lock. Only try-lock the mutex if - * it is unlocked to reduce unnecessary xchg() operations. + * After waiting to acquire the wait_lock, try again. */ - if (!mutex_is_locked(lock) && - (atomic_xchg_acquire(&lock->count, 0) == 1)) + if (__mutex_trylock(lock)) goto skip_wait; debug_mutex_lock_common(lock, &waiter); @@ -543,21 +561,13 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, list_add_tail(&waiter.list, &lock->wait_list); waiter.task = task; + if (list_first_entry(&lock->wait_list, struct mutex_waiter, list) == &waiter) + __mutex_set_flag(lock, MUTEX_FLAG_WAITERS); + lock_contended(&lock->dep_map, ip); for (;;) { - /* - * Lets try to take the lock again - this is needed even if - * we get here for the first time (shortly after failing to - * acquire the lock), to make sure that we get a wakeup once - * it's unlocked. Later on, if we sleep, this is the - * operation that gives us the lock. We xchg it to -1, so - * that when we release the lock, we properly wake up the - * other waiters. We only attempt the xchg if the count is - * non-negative in order to avoid unnecessary xchg operations: - */ - if (atomic_read(&lock->count) >= 0 && - (atomic_xchg_acquire(&lock->count, -1) == 1)) + if (__mutex_trylock(lock)) break; /* @@ -585,15 +595,14 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, __set_task_state(task, TASK_RUNNING); mutex_remove_waiter(lock, &waiter, task); - /* set it to 0 if there are no waiters left: */ if (likely(list_empty(&lock->wait_list))) - atomic_set(&lock->count, 0); + __mutex_clear_flag(lock, MUTEX_FLAG_WAITERS); + debug_mutex_free_waiter(&waiter); skip_wait: /* got the lock - cleanup and rejoice! */ lock_acquired(&lock->dep_map, ip); - mutex_set_owner(lock); if (use_ww_ctx) { struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); @@ -631,7 +640,6 @@ _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, nest, _RET_IP_, NULL, 0); } - EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock); int __sched @@ -650,7 +658,6 @@ mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, NULL, _RET_IP_, NULL, 0); } - EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); static inline int @@ -715,29 +722,22 @@ EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible); /* * Release the lock, slowpath: */ -static inline void -__mutex_unlock_common_slowpath(struct mutex *lock, int nested) +static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip) { - unsigned long flags; + unsigned long owner, flags; WAKE_Q(wake_q); + mutex_release(&lock->dep_map, 1, ip); + /* - * As a performance measurement, release the lock before doing other - * wakeup related duties to follow. This allows other tasks to acquire - * the lock sooner, while still handling cleanups in past unlock calls. - * This can be done as we do not enforce strict equivalence between the - * mutex counter and wait_list. - * - * - * Some architectures leave the lock unlocked in the fastpath failure - * case, others need to leave it locked. In the later case we have to - * unlock it here - as the lock counter is currently 0 or negative. + * Release the lock before (potentially) taking the spinlock + * such that other contenders can get on with things ASAP. */ - if (__mutex_slowpath_needs_to_unlock()) - atomic_set(&lock->count, 1); + owner = atomic_long_fetch_and_release(MUTEX_FLAGS, &lock->owner); + if (!__owner_flags(owner)) + return; spin_lock_mutex(&lock->wait_lock, flags); - mutex_release(&lock->dep_map, nested, _RET_IP_); debug_mutex_unlock(lock); if (!list_empty(&lock->wait_list)) { @@ -754,17 +754,6 @@ __mutex_unlock_common_slowpath(struct mutex *lock, int nested) wake_up_q(&wake_q); } -/* - * Release the lock, slowpath: - */ -__visible void -__mutex_unlock_slowpath(atomic_t *lock_count) -{ - struct mutex *lock = container_of(lock_count, struct mutex, count); - - __mutex_unlock_common_slowpath(lock, 1); -} - #ifndef CONFIG_DEBUG_LOCK_ALLOC /* * Here come the less common (and hence less performance-critical) APIs: @@ -789,38 +778,30 @@ __mutex_lock_interruptible_slowpath(struct mutex *lock); */ int __sched mutex_lock_interruptible(struct mutex *lock) { - int ret; - might_sleep(); - ret = __mutex_fastpath_lock_retval(&lock->count); - if (likely(!ret)) { - mutex_set_owner(lock); + + if (__mutex_trylock_fast(lock)) return 0; - } else - return __mutex_lock_interruptible_slowpath(lock); + + return __mutex_lock_interruptible_slowpath(lock); } EXPORT_SYMBOL(mutex_lock_interruptible); int __sched mutex_lock_killable(struct mutex *lock) { - int ret; - might_sleep(); - ret = __mutex_fastpath_lock_retval(&lock->count); - if (likely(!ret)) { - mutex_set_owner(lock); + + if (__mutex_trylock_fast(lock)) return 0; - } else - return __mutex_lock_killable_slowpath(lock); + + return __mutex_lock_killable_slowpath(lock); } EXPORT_SYMBOL(mutex_lock_killable); -__visible void __sched -__mutex_lock_slowpath(atomic_t *lock_count) +static noinline void __sched +__mutex_lock_slowpath(struct mutex *lock) { - struct mutex *lock = container_of(lock_count, struct mutex, count); - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_, NULL, 0); } @@ -856,37 +837,6 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, #endif -/* - * Spinlock based trylock, we take the spinlock and check whether we - * can get the lock: - */ -static inline int __mutex_trylock_slowpath(atomic_t *lock_count) -{ - struct mutex *lock = container_of(lock_count, struct mutex, count); - unsigned long flags; - int prev; - - /* No need to trylock if the mutex is locked. */ - if (mutex_is_locked(lock)) - return 0; - - spin_lock_mutex(&lock->wait_lock, flags); - - prev = atomic_xchg_acquire(&lock->count, -1); - if (likely(prev == 1)) { - mutex_set_owner(lock); - mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); - } - - /* Set it back to 0 if there are no waiters: */ - if (likely(list_empty(&lock->wait_list))) - atomic_set(&lock->count, 0); - - spin_unlock_mutex(&lock->wait_lock, flags); - - return prev == 1; -} - /** * mutex_trylock - try to acquire the mutex, without waiting * @lock: the mutex to be acquired @@ -903,13 +853,12 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count) */ int __sched mutex_trylock(struct mutex *lock) { - int ret; + bool locked = __mutex_trylock(lock); - ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath); - if (ret) - mutex_set_owner(lock); + if (locked) + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); - return ret; + return locked; } EXPORT_SYMBOL(mutex_trylock); @@ -917,36 +866,28 @@ EXPORT_SYMBOL(mutex_trylock); int __sched __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { - int ret; - might_sleep(); - ret = __mutex_fastpath_lock_retval(&lock->base.count); - - if (likely(!ret)) { + if (__mutex_trylock_fast(&lock->base)) { ww_mutex_set_context_fastpath(lock, ctx); - mutex_set_owner(&lock->base); - } else - ret = __ww_mutex_lock_slowpath(lock, ctx); - return ret; + return 0; + } + + return __ww_mutex_lock_slowpath(lock, ctx); } EXPORT_SYMBOL(__ww_mutex_lock); int __sched __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { - int ret; - might_sleep(); - ret = __mutex_fastpath_lock_retval(&lock->base.count); - - if (likely(!ret)) { + if (__mutex_trylock_fast(&lock->base)) { ww_mutex_set_context_fastpath(lock, ctx); - mutex_set_owner(&lock->base); - } else - ret = __ww_mutex_lock_interruptible_slowpath(lock, ctx); - return ret; + return 0; + } + + return __ww_mutex_lock_interruptible_slowpath(lock, ctx); } EXPORT_SYMBOL(__ww_mutex_lock_interruptible); diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h index 6cd6b8e9efd7..4410a4af42a3 100644 --- a/kernel/locking/mutex.h +++ b/kernel/locking/mutex.h @@ -16,32 +16,6 @@ #define mutex_remove_waiter(lock, waiter, task) \ __list_del((waiter)->list.prev, (waiter)->list.next) -#ifdef CONFIG_MUTEX_SPIN_ON_OWNER -/* - * The mutex owner can get read and written to locklessly. - * We should use WRITE_ONCE when writing the owner value to - * avoid store tearing, otherwise, a thread could potentially - * read a partially written and incomplete owner value. - */ -static inline void mutex_set_owner(struct mutex *lock) -{ - WRITE_ONCE(lock->owner, current); -} - -static inline void mutex_clear_owner(struct mutex *lock) -{ - WRITE_ONCE(lock->owner, NULL); -} -#else -static inline void mutex_set_owner(struct mutex *lock) -{ -} - -static inline void mutex_clear_owner(struct mutex *lock) -{ -} -#endif - #define debug_mutex_wake_waiter(lock, waiter) do { } while (0) #define debug_mutex_free_waiter(waiter) do { } while (0) #define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 94732d1ab00a..8912aafd09e1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -75,11 +75,11 @@ #include #include #include +#include #include #include #include -#include #ifdef CONFIG_PARAVIRT #include #endif -- cgit v1.2.3 From f54d1867005c3323f5d8ad83eed823e84226c429 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 25 Oct 2016 13:00:45 +0100 Subject: dma-buf: Rename struct fence to dma_fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I plan to usurp the short name of struct fence for a core kernel struct, and so I need to rename the specialised fence/timeline for DMA operations to make room. A consensus was reached in https://lists.freedesktop.org/archives/dri-devel/2016-July/113083.html that making clear this fence applies to DMA operations was a good thing. Since then the patch has grown a bit as usage increases, so hopefully it remains a good thing! (v2...: rebase, rerun spatch) v3: Compile on msm, spotted a manual fixup that I broke. v4: Try again for msm, sorry Daniel coccinelle script: @@ @@ - struct fence + struct dma_fence @@ @@ - struct fence_ops + struct dma_fence_ops @@ @@ - struct fence_cb + struct dma_fence_cb @@ @@ - struct fence_array + struct dma_fence_array @@ @@ - enum fence_flag_bits + enum dma_fence_flag_bits @@ @@ ( - fence_init + dma_fence_init | - fence_release + dma_fence_release | - fence_free + dma_fence_free | - fence_get + dma_fence_get | - fence_get_rcu + dma_fence_get_rcu | - fence_put + dma_fence_put | - fence_signal + dma_fence_signal | - fence_signal_locked + dma_fence_signal_locked | - fence_default_wait + dma_fence_default_wait | - fence_add_callback + dma_fence_add_callback | - fence_remove_callback + dma_fence_remove_callback | - fence_enable_sw_signaling + dma_fence_enable_sw_signaling | - fence_is_signaled_locked + dma_fence_is_signaled_locked | - fence_is_signaled + dma_fence_is_signaled | - fence_is_later + dma_fence_is_later | - fence_later + dma_fence_later | - fence_wait_timeout + dma_fence_wait_timeout | - fence_wait_any_timeout + dma_fence_wait_any_timeout | - fence_wait + dma_fence_wait | - fence_context_alloc + dma_fence_context_alloc | - fence_array_create + dma_fence_array_create | - to_fence_array + to_dma_fence_array | - fence_is_array + dma_fence_is_array | - trace_fence_emit + trace_dma_fence_emit | - FENCE_TRACE + DMA_FENCE_TRACE | - FENCE_WARN + DMA_FENCE_WARN | - FENCE_ERR + DMA_FENCE_ERR ) ( ... ) Signed-off-by: Chris Wilson Reviewed-by: Gustavo Padovan Acked-by: Sumit Semwal Acked-by: Christian König Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20161025120045.28839-1-chris@chris-wilson.co.uk --- Documentation/sync_file.txt | 14 +- drivers/base/Kconfig | 6 +- drivers/dma-buf/Kconfig | 2 +- drivers/dma-buf/Makefile | 2 +- drivers/dma-buf/dma-buf.c | 28 +- drivers/dma-buf/dma-fence-array.c | 146 +++++++ drivers/dma-buf/dma-fence.c | 537 ++++++++++++++++++++++++ drivers/dma-buf/fence-array.c | 145 ------- drivers/dma-buf/fence.c | 534 ----------------------- drivers/dma-buf/reservation.c | 94 +++-- drivers/dma-buf/seqno-fence.c | 18 +- drivers/dma-buf/sw_sync.c | 48 +-- drivers/dma-buf/sync_debug.c | 13 +- drivers/dma-buf/sync_debug.h | 9 +- drivers/dma-buf/sync_file.c | 63 +-- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 54 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 16 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 22 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 16 +- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 58 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 22 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 24 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 48 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_test.c | 12 +- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 10 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 26 +- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 26 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 ++-- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 6 +- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 12 +- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 6 +- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/si_dma.c | 6 +- drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h | 4 +- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 67 +-- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 26 +- drivers/gpu/drm/amd/scheduler/sched_fence.c | 48 ++- drivers/gpu/drm/drm_atomic.c | 2 +- drivers/gpu/drm/drm_atomic_helper.c | 8 +- drivers/gpu/drm/drm_fops.c | 6 +- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 6 +- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 46 +- drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 4 +- drivers/gpu/drm/i915/i915_gem_request.c | 32 +- drivers/gpu/drm/i915/i915_gem_request.h | 18 +- drivers/gpu/drm/i915/i915_sw_fence.c | 41 +- drivers/gpu/drm/i915/i915_sw_fence.h | 8 +- drivers/gpu/drm/i915/i915_trace.h | 2 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 4 +- drivers/gpu/drm/i915/intel_engine_cs.c | 2 +- drivers/gpu/drm/msm/msm_drv.h | 2 +- drivers/gpu/drm/msm/msm_fence.c | 28 +- drivers/gpu/drm/msm/msm_fence.h | 2 +- drivers/gpu/drm/msm/msm_gem.c | 14 +- drivers/gpu/drm/msm/msm_gem.h | 2 +- drivers/gpu/drm/msm/msm_gem_submit.c | 8 +- drivers/gpu/drm/msm/msm_gpu.c | 2 +- drivers/gpu/drm/nouveau/nouveau_bo.c | 6 +- drivers/gpu/drm/nouveau/nouveau_fence.c | 80 ++-- drivers/gpu/drm/nouveau/nouveau_fence.h | 6 +- drivers/gpu/drm/nouveau/nouveau_gem.c | 2 +- drivers/gpu/drm/nouveau/nv04_fence.c | 2 +- drivers/gpu/drm/nouveau/nv10_fence.c | 2 +- drivers/gpu/drm/nouveau/nv17_fence.c | 2 +- drivers/gpu/drm/nouveau/nv50_fence.c | 2 +- drivers/gpu/drm/nouveau/nv84_fence.c | 2 +- drivers/gpu/drm/qxl/qxl_drv.h | 4 +- drivers/gpu/drm/qxl/qxl_release.c | 35 +- drivers/gpu/drm/radeon/radeon.h | 10 +- drivers/gpu/drm/radeon/radeon_device.c | 2 +- drivers/gpu/drm/radeon/radeon_display.c | 8 +- drivers/gpu/drm/radeon/radeon_fence.c | 56 +-- drivers/gpu/drm/radeon/radeon_sync.c | 6 +- drivers/gpu/drm/radeon/radeon_uvd.c | 2 +- drivers/gpu/drm/ttm/ttm_bo.c | 24 +- drivers/gpu/drm/ttm/ttm_bo_util.c | 22 +- drivers/gpu/drm/ttm/ttm_bo_vm.c | 8 +- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 3 +- drivers/gpu/drm/vgem/vgem_fence.c | 53 +-- drivers/gpu/drm/virtio/virtgpu_drv.h | 2 +- drivers/gpu/drm/virtio/virtgpu_fence.c | 26 +- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 12 +- drivers/gpu/drm/virtio/virtgpu_kms.c | 2 +- drivers/gpu/drm/virtio/virtgpu_plane.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 44 +- drivers/gpu/drm/vmwgfx/vmwgfx_fence.h | 8 +- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 2 +- include/drm/drmP.h | 4 +- include/drm/drm_crtc.h | 2 +- include/drm/drm_plane.h | 2 +- include/drm/ttm/ttm_bo_api.h | 2 +- include/drm/ttm/ttm_bo_driver.h | 6 +- include/drm/ttm/ttm_execbuf_util.h | 2 +- include/linux/dma-buf.h | 4 +- include/linux/dma-fence-array.h | 86 ++++ include/linux/dma-fence.h | 437 +++++++++++++++++++ include/linux/fence-array.h | 84 ---- include/linux/fence.h | 424 ------------------- include/linux/reservation.h | 28 +- include/linux/seqno-fence.h | 20 +- include/linux/sync_file.h | 14 +- include/trace/events/dma_fence.h | 128 ++++++ include/trace/events/fence.h | 128 ------ 114 files changed, 2206 insertions(+), 2168 deletions(-) create mode 100644 drivers/dma-buf/dma-fence-array.c create mode 100644 drivers/dma-buf/dma-fence.c delete mode 100644 drivers/dma-buf/fence-array.c delete mode 100644 drivers/dma-buf/fence.c create mode 100644 include/linux/dma-fence-array.h create mode 100644 include/linux/dma-fence.h delete mode 100644 include/linux/fence-array.h delete mode 100644 include/linux/fence.h create mode 100644 include/trace/events/dma_fence.h delete mode 100644 include/trace/events/fence.h (limited to 'include/linux') diff --git a/Documentation/sync_file.txt b/Documentation/sync_file.txt index b63a68531afd..269681a6faec 100644 --- a/Documentation/sync_file.txt +++ b/Documentation/sync_file.txt @@ -6,7 +6,7 @@ This document serves as a guide for device drivers writers on what the sync_file API is, and how drivers can support it. Sync file is the carrier of -the fences(struct fence) that are needed to synchronize between drivers or +the fences(struct dma_fence) that are needed to synchronize between drivers or across process boundaries. The sync_file API is meant to be used to send and receive fence information @@ -32,9 +32,9 @@ in-fences and out-fences Sync files can go either to or from userspace. When a sync_file is sent from the driver to userspace we call the fences it contains 'out-fences'. They are related to a buffer that the driver is processing or is going to process, so -the driver creates an out-fence to be able to notify, through fence_signal(), -when it has finished using (or processing) that buffer. Out-fences are fences -that the driver creates. +the driver creates an out-fence to be able to notify, through +dma_fence_signal(), when it has finished using (or processing) that buffer. +Out-fences are fences that the driver creates. On the other hand if the driver receives fence(s) through a sync_file from userspace we call these fence(s) 'in-fences'. Receiveing in-fences means that @@ -47,7 +47,7 @@ Creating Sync Files When a driver needs to send an out-fence userspace it creates a sync_file. Interface: - struct sync_file *sync_file_create(struct fence *fence); + struct sync_file *sync_file_create(struct dma_fence *fence); The caller pass the out-fence and gets back the sync_file. That is just the first step, next it needs to install an fd on sync_file->file. So it gets an @@ -72,11 +72,11 @@ of the Sync File to the kernel. The kernel can then retrieve the fences from it. Interface: - struct fence *sync_file_get_fence(int fd); + struct dma_fence *sync_file_get_fence(int fd); The returned reference is owned by the caller and must be disposed of -afterwards using fence_put(). In case of error, a NULL is returned instead. +afterwards using dma_fence_put(). In case of error, a NULL is returned instead. References: [1] struct sync_file in include/linux/sync_file.h diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index fdf44cac08e6..37bf25c6b4a6 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -248,11 +248,11 @@ config DMA_SHARED_BUFFER APIs extension; the file's descriptor can then be passed on to other driver. -config FENCE_TRACE - bool "Enable verbose FENCE_TRACE messages" +config DMA_FENCE_TRACE + bool "Enable verbose DMA_FENCE_TRACE messages" depends on DMA_SHARED_BUFFER help - Enable the FENCE_TRACE printks. This will add extra + Enable the DMA_FENCE_TRACE printks. This will add extra spam to the console log, but will make it easier to diagnose lockup related problems for dma-buffers shared across multiple devices. diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig index 2585821b24ab..ed3b785bae37 100644 --- a/drivers/dma-buf/Kconfig +++ b/drivers/dma-buf/Kconfig @@ -7,7 +7,7 @@ config SYNC_FILE select DMA_SHARED_BUFFER ---help--- The Sync File Framework adds explicit syncronization via - userspace. It enables send/receive 'struct fence' objects to/from + userspace. It enables send/receive 'struct dma_fence' objects to/from userspace via Sync File fds for synchronization between drivers via userspace components. It has been ported from Android. diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile index 210a10bfad2b..c33bf8863147 100644 --- a/drivers/dma-buf/Makefile +++ b/drivers/dma-buf/Makefile @@ -1,3 +1,3 @@ -obj-y := dma-buf.o fence.o reservation.o seqno-fence.o fence-array.o +obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o seqno-fence.o obj-$(CONFIG_SYNC_FILE) += sync_file.o obj-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index cf04d249a6a4..e72e64484131 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include @@ -124,7 +124,7 @@ static loff_t dma_buf_llseek(struct file *file, loff_t offset, int whence) return base + offset; } -static void dma_buf_poll_cb(struct fence *fence, struct fence_cb *cb) +static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { struct dma_buf_poll_cb_t *dcb = (struct dma_buf_poll_cb_t *)cb; unsigned long flags; @@ -140,7 +140,7 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll) struct dma_buf *dmabuf; struct reservation_object *resv; struct reservation_object_list *fobj; - struct fence *fence_excl; + struct dma_fence *fence_excl; unsigned long events; unsigned shared_count, seq; @@ -187,20 +187,20 @@ retry: spin_unlock_irq(&dmabuf->poll.lock); if (events & pevents) { - if (!fence_get_rcu(fence_excl)) { + if (!dma_fence_get_rcu(fence_excl)) { /* force a recheck */ events &= ~pevents; dma_buf_poll_cb(NULL, &dcb->cb); - } else if (!fence_add_callback(fence_excl, &dcb->cb, - dma_buf_poll_cb)) { + } else if (!dma_fence_add_callback(fence_excl, &dcb->cb, + dma_buf_poll_cb)) { events &= ~pevents; - fence_put(fence_excl); + dma_fence_put(fence_excl); } else { /* * No callback queued, wake up any additional * waiters. */ - fence_put(fence_excl); + dma_fence_put(fence_excl); dma_buf_poll_cb(NULL, &dcb->cb); } } @@ -222,9 +222,9 @@ retry: goto out; for (i = 0; i < shared_count; ++i) { - struct fence *fence = rcu_dereference(fobj->shared[i]); + struct dma_fence *fence = rcu_dereference(fobj->shared[i]); - if (!fence_get_rcu(fence)) { + if (!dma_fence_get_rcu(fence)) { /* * fence refcount dropped to zero, this means * that fobj has been freed @@ -235,13 +235,13 @@ retry: dma_buf_poll_cb(NULL, &dcb->cb); break; } - if (!fence_add_callback(fence, &dcb->cb, - dma_buf_poll_cb)) { - fence_put(fence); + if (!dma_fence_add_callback(fence, &dcb->cb, + dma_buf_poll_cb)) { + dma_fence_put(fence); events &= ~POLLOUT; break; } - fence_put(fence); + dma_fence_put(fence); } /* No callback queued, wake up any additional waiters. */ diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c new file mode 100644 index 000000000000..67eb7c8fb88c --- /dev/null +++ b/drivers/dma-buf/dma-fence-array.c @@ -0,0 +1,146 @@ +/* + * dma-fence-array: aggregate fences to be waited together + * + * Copyright (C) 2016 Collabora Ltd + * Copyright (C) 2016 Advanced Micro Devices, Inc. + * Authors: + * Gustavo Padovan + * Christian König + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include + +static const char *dma_fence_array_get_driver_name(struct dma_fence *fence) +{ + return "dma_fence_array"; +} + +static const char *dma_fence_array_get_timeline_name(struct dma_fence *fence) +{ + return "unbound"; +} + +static void dma_fence_array_cb_func(struct dma_fence *f, + struct dma_fence_cb *cb) +{ + struct dma_fence_array_cb *array_cb = + container_of(cb, struct dma_fence_array_cb, cb); + struct dma_fence_array *array = array_cb->array; + + if (atomic_dec_and_test(&array->num_pending)) + dma_fence_signal(&array->base); + dma_fence_put(&array->base); +} + +static bool dma_fence_array_enable_signaling(struct dma_fence *fence) +{ + struct dma_fence_array *array = to_dma_fence_array(fence); + struct dma_fence_array_cb *cb = (void *)(&array[1]); + unsigned i; + + for (i = 0; i < array->num_fences; ++i) { + cb[i].array = array; + /* + * As we may report that the fence is signaled before all + * callbacks are complete, we need to take an additional + * reference count on the array so that we do not free it too + * early. The core fence handling will only hold the reference + * until we signal the array as complete (but that is now + * insufficient). + */ + dma_fence_get(&array->base); + if (dma_fence_add_callback(array->fences[i], &cb[i].cb, + dma_fence_array_cb_func)) { + dma_fence_put(&array->base); + if (atomic_dec_and_test(&array->num_pending)) + return false; + } + } + + return true; +} + +static bool dma_fence_array_signaled(struct dma_fence *fence) +{ + struct dma_fence_array *array = to_dma_fence_array(fence); + + return atomic_read(&array->num_pending) <= 0; +} + +static void dma_fence_array_release(struct dma_fence *fence) +{ + struct dma_fence_array *array = to_dma_fence_array(fence); + unsigned i; + + for (i = 0; i < array->num_fences; ++i) + dma_fence_put(array->fences[i]); + + kfree(array->fences); + dma_fence_free(fence); +} + +const struct dma_fence_ops dma_fence_array_ops = { + .get_driver_name = dma_fence_array_get_driver_name, + .get_timeline_name = dma_fence_array_get_timeline_name, + .enable_signaling = dma_fence_array_enable_signaling, + .signaled = dma_fence_array_signaled, + .wait = dma_fence_default_wait, + .release = dma_fence_array_release, +}; +EXPORT_SYMBOL(dma_fence_array_ops); + +/** + * dma_fence_array_create - Create a custom fence array + * @num_fences: [in] number of fences to add in the array + * @fences: [in] array containing the fences + * @context: [in] fence context to use + * @seqno: [in] sequence number to use + * @signal_on_any: [in] signal on any fence in the array + * + * Allocate a dma_fence_array object and initialize the base fence with + * dma_fence_init(). + * In case of error it returns NULL. + * + * The caller should allocate the fences array with num_fences size + * and fill it with the fences it wants to add to the object. Ownership of this + * array is taken and dma_fence_put() is used on each fence on release. + * + * If @signal_on_any is true the fence array signals if any fence in the array + * signals, otherwise it signals when all fences in the array signal. + */ +struct dma_fence_array *dma_fence_array_create(int num_fences, + struct dma_fence **fences, + u64 context, unsigned seqno, + bool signal_on_any) +{ + struct dma_fence_array *array; + size_t size = sizeof(*array); + + /* Allocate the callback structures behind the array. */ + size += num_fences * sizeof(struct dma_fence_array_cb); + array = kzalloc(size, GFP_KERNEL); + if (!array) + return NULL; + + spin_lock_init(&array->lock); + dma_fence_init(&array->base, &dma_fence_array_ops, &array->lock, + context, seqno); + + array->num_fences = num_fences; + atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences); + array->fences = fences; + + return array; +} +EXPORT_SYMBOL(dma_fence_array_create); diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c new file mode 100644 index 000000000000..3a7bf009c21c --- /dev/null +++ b/drivers/dma-buf/dma-fence.c @@ -0,0 +1,537 @@ +/* + * Fence mechanism for dma-buf and to allow for asynchronous dma access + * + * Copyright (C) 2012 Canonical Ltd + * Copyright (C) 2012 Texas Instruments + * + * Authors: + * Rob Clark + * Maarten Lankhorst + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include + +EXPORT_TRACEPOINT_SYMBOL(dma_fence_annotate_wait_on); +EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit); + +/* + * fence context counter: each execution context should have its own + * fence context, this allows checking if fences belong to the same + * context or not. One device can have multiple separate contexts, + * and they're used if some engine can run independently of another. + */ +static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(0); + +/** + * dma_fence_context_alloc - allocate an array of fence contexts + * @num: [in] amount of contexts to allocate + * + * This function will return the first index of the number of fences allocated. + * The fence context is used for setting fence->context to a unique number. + */ +u64 dma_fence_context_alloc(unsigned num) +{ + BUG_ON(!num); + return atomic64_add_return(num, &dma_fence_context_counter) - num; +} +EXPORT_SYMBOL(dma_fence_context_alloc); + +/** + * dma_fence_signal_locked - signal completion of a fence + * @fence: the fence to signal + * + * Signal completion for software callbacks on a fence, this will unblock + * dma_fence_wait() calls and run all the callbacks added with + * dma_fence_add_callback(). Can be called multiple times, but since a fence + * can only go from unsignaled to signaled state, it will only be effective + * the first time. + * + * Unlike dma_fence_signal, this function must be called with fence->lock held. + */ +int dma_fence_signal_locked(struct dma_fence *fence) +{ + struct dma_fence_cb *cur, *tmp; + int ret = 0; + + lockdep_assert_held(fence->lock); + + if (WARN_ON(!fence)) + return -EINVAL; + + if (!ktime_to_ns(fence->timestamp)) { + fence->timestamp = ktime_get(); + smp_mb__before_atomic(); + } + + if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { + ret = -EINVAL; + + /* + * we might have raced with the unlocked dma_fence_signal, + * still run through all callbacks + */ + } else + trace_dma_fence_signaled(fence); + + list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) { + list_del_init(&cur->node); + cur->func(fence, cur); + } + return ret; +} +EXPORT_SYMBOL(dma_fence_signal_locked); + +/** + * dma_fence_signal - signal completion of a fence + * @fence: the fence to signal + * + * Signal completion for software callbacks on a fence, this will unblock + * dma_fence_wait() calls and run all the callbacks added with + * dma_fence_add_callback(). Can be called multiple times, but since a fence + * can only go from unsignaled to signaled state, it will only be effective + * the first time. + */ +int dma_fence_signal(struct dma_fence *fence) +{ + unsigned long flags; + + if (!fence) + return -EINVAL; + + if (!ktime_to_ns(fence->timestamp)) { + fence->timestamp = ktime_get(); + smp_mb__before_atomic(); + } + + if (test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + return -EINVAL; + + trace_dma_fence_signaled(fence); + + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags)) { + struct dma_fence_cb *cur, *tmp; + + spin_lock_irqsave(fence->lock, flags); + list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) { + list_del_init(&cur->node); + cur->func(fence, cur); + } + spin_unlock_irqrestore(fence->lock, flags); + } + return 0; +} +EXPORT_SYMBOL(dma_fence_signal); + +/** + * dma_fence_wait_timeout - sleep until the fence gets signaled + * or until timeout elapses + * @fence: [in] the fence to wait on + * @intr: [in] if true, do an interruptible wait + * @timeout: [in] timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT + * + * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the + * remaining timeout in jiffies on success. Other error values may be + * returned on custom implementations. + * + * Performs a synchronous wait on this fence. It is assumed the caller + * directly or indirectly (buf-mgr between reservation and committing) + * holds a reference to the fence, otherwise the fence might be + * freed before return, resulting in undefined behavior. + */ +signed long +dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout) +{ + signed long ret; + + if (WARN_ON(timeout < 0)) + return -EINVAL; + + if (timeout == 0) + return dma_fence_is_signaled(fence); + + trace_dma_fence_wait_start(fence); + ret = fence->ops->wait(fence, intr, timeout); + trace_dma_fence_wait_end(fence); + return ret; +} +EXPORT_SYMBOL(dma_fence_wait_timeout); + +void dma_fence_release(struct kref *kref) +{ + struct dma_fence *fence = + container_of(kref, struct dma_fence, refcount); + + trace_dma_fence_destroy(fence); + + BUG_ON(!list_empty(&fence->cb_list)); + + if (fence->ops->release) + fence->ops->release(fence); + else + dma_fence_free(fence); +} +EXPORT_SYMBOL(dma_fence_release); + +void dma_fence_free(struct dma_fence *fence) +{ + kfree_rcu(fence, rcu); +} +EXPORT_SYMBOL(dma_fence_free); + +/** + * dma_fence_enable_sw_signaling - enable signaling on fence + * @fence: [in] the fence to enable + * + * this will request for sw signaling to be enabled, to make the fence + * complete as soon as possible + */ +void dma_fence_enable_sw_signaling(struct dma_fence *fence) +{ + unsigned long flags; + + if (!test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &fence->flags) && + !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { + trace_dma_fence_enable_signal(fence); + + spin_lock_irqsave(fence->lock, flags); + + if (!fence->ops->enable_signaling(fence)) + dma_fence_signal_locked(fence); + + spin_unlock_irqrestore(fence->lock, flags); + } +} +EXPORT_SYMBOL(dma_fence_enable_sw_signaling); + +/** + * dma_fence_add_callback - add a callback to be called when the fence + * is signaled + * @fence: [in] the fence to wait on + * @cb: [in] the callback to register + * @func: [in] the function to call + * + * cb will be initialized by dma_fence_add_callback, no initialization + * by the caller is required. Any number of callbacks can be registered + * to a fence, but a callback can only be registered to one fence at a time. + * + * Note that the callback can be called from an atomic context. If + * fence is already signaled, this function will return -ENOENT (and + * *not* call the callback) + * + * Add a software callback to the fence. Same restrictions apply to + * refcount as it does to dma_fence_wait, however the caller doesn't need to + * keep a refcount to fence afterwards: when software access is enabled, + * the creator of the fence is required to keep the fence alive until + * after it signals with dma_fence_signal. The callback itself can be called + * from irq context. + * + */ +int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb, + dma_fence_func_t func) +{ + unsigned long flags; + int ret = 0; + bool was_set; + + if (WARN_ON(!fence || !func)) + return -EINVAL; + + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { + INIT_LIST_HEAD(&cb->node); + return -ENOENT; + } + + spin_lock_irqsave(fence->lock, flags); + + was_set = test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &fence->flags); + + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + ret = -ENOENT; + else if (!was_set) { + trace_dma_fence_enable_signal(fence); + + if (!fence->ops->enable_signaling(fence)) { + dma_fence_signal_locked(fence); + ret = -ENOENT; + } + } + + if (!ret) { + cb->func = func; + list_add_tail(&cb->node, &fence->cb_list); + } else + INIT_LIST_HEAD(&cb->node); + spin_unlock_irqrestore(fence->lock, flags); + + return ret; +} +EXPORT_SYMBOL(dma_fence_add_callback); + +/** + * dma_fence_remove_callback - remove a callback from the signaling list + * @fence: [in] the fence to wait on + * @cb: [in] the callback to remove + * + * Remove a previously queued callback from the fence. This function returns + * true if the callback is successfully removed, or false if the fence has + * already been signaled. + * + * *WARNING*: + * Cancelling a callback should only be done if you really know what you're + * doing, since deadlocks and race conditions could occur all too easily. For + * this reason, it should only ever be done on hardware lockup recovery, + * with a reference held to the fence. + */ +bool +dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + unsigned long flags; + bool ret; + + spin_lock_irqsave(fence->lock, flags); + + ret = !list_empty(&cb->node); + if (ret) + list_del_init(&cb->node); + + spin_unlock_irqrestore(fence->lock, flags); + + return ret; +} +EXPORT_SYMBOL(dma_fence_remove_callback); + +struct default_wait_cb { + struct dma_fence_cb base; + struct task_struct *task; +}; + +static void +dma_fence_default_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct default_wait_cb *wait = + container_of(cb, struct default_wait_cb, base); + + wake_up_state(wait->task, TASK_NORMAL); +} + +/** + * dma_fence_default_wait - default sleep until the fence gets signaled + * or until timeout elapses + * @fence: [in] the fence to wait on + * @intr: [in] if true, do an interruptible wait + * @timeout: [in] timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT + * + * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the + * remaining timeout in jiffies on success. + */ +signed long +dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) +{ + struct default_wait_cb cb; + unsigned long flags; + signed long ret = timeout; + bool was_set; + + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + return timeout; + + spin_lock_irqsave(fence->lock, flags); + + if (intr && signal_pending(current)) { + ret = -ERESTARTSYS; + goto out; + } + + was_set = test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &fence->flags); + + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + goto out; + + if (!was_set) { + trace_dma_fence_enable_signal(fence); + + if (!fence->ops->enable_signaling(fence)) { + dma_fence_signal_locked(fence); + goto out; + } + } + + cb.base.func = dma_fence_default_wait_cb; + cb.task = current; + list_add(&cb.base.node, &fence->cb_list); + + while (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) && ret > 0) { + if (intr) + __set_current_state(TASK_INTERRUPTIBLE); + else + __set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock_irqrestore(fence->lock, flags); + + ret = schedule_timeout(ret); + + spin_lock_irqsave(fence->lock, flags); + if (ret > 0 && intr && signal_pending(current)) + ret = -ERESTARTSYS; + } + + if (!list_empty(&cb.base.node)) + list_del(&cb.base.node); + __set_current_state(TASK_RUNNING); + +out: + spin_unlock_irqrestore(fence->lock, flags); + return ret; +} +EXPORT_SYMBOL(dma_fence_default_wait); + +static bool +dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count) +{ + int i; + + for (i = 0; i < count; ++i) { + struct dma_fence *fence = fences[i]; + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + return true; + } + return false; +} + +/** + * dma_fence_wait_any_timeout - sleep until any fence gets signaled + * or until timeout elapses + * @fences: [in] array of fences to wait on + * @count: [in] number of fences to wait on + * @intr: [in] if true, do an interruptible wait + * @timeout: [in] timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT + * + * Returns -EINVAL on custom fence wait implementation, -ERESTARTSYS if + * interrupted, 0 if the wait timed out, or the remaining timeout in jiffies + * on success. + * + * Synchronous waits for the first fence in the array to be signaled. The + * caller needs to hold a reference to all fences in the array, otherwise a + * fence might be freed before return, resulting in undefined behavior. + */ +signed long +dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count, + bool intr, signed long timeout) +{ + struct default_wait_cb *cb; + signed long ret = timeout; + unsigned i; + + if (WARN_ON(!fences || !count || timeout < 0)) + return -EINVAL; + + if (timeout == 0) { + for (i = 0; i < count; ++i) + if (dma_fence_is_signaled(fences[i])) + return 1; + + return 0; + } + + cb = kcalloc(count, sizeof(struct default_wait_cb), GFP_KERNEL); + if (cb == NULL) { + ret = -ENOMEM; + goto err_free_cb; + } + + for (i = 0; i < count; ++i) { + struct dma_fence *fence = fences[i]; + + if (fence->ops->wait != dma_fence_default_wait) { + ret = -EINVAL; + goto fence_rm_cb; + } + + cb[i].task = current; + if (dma_fence_add_callback(fence, &cb[i].base, + dma_fence_default_wait_cb)) { + /* This fence is already signaled */ + goto fence_rm_cb; + } + } + + while (ret > 0) { + if (intr) + set_current_state(TASK_INTERRUPTIBLE); + else + set_current_state(TASK_UNINTERRUPTIBLE); + + if (dma_fence_test_signaled_any(fences, count)) + break; + + ret = schedule_timeout(ret); + + if (ret > 0 && intr && signal_pending(current)) + ret = -ERESTARTSYS; + } + + __set_current_state(TASK_RUNNING); + +fence_rm_cb: + while (i-- > 0) + dma_fence_remove_callback(fences[i], &cb[i].base); + +err_free_cb: + kfree(cb); + + return ret; +} +EXPORT_SYMBOL(dma_fence_wait_any_timeout); + +/** + * dma_fence_init - Initialize a custom fence. + * @fence: [in] the fence to initialize + * @ops: [in] the dma_fence_ops for operations on this fence + * @lock: [in] the irqsafe spinlock to use for locking this fence + * @context: [in] the execution context this fence is run on + * @seqno: [in] a linear increasing sequence number for this context + * + * Initializes an allocated fence, the caller doesn't have to keep its + * refcount after committing with this fence, but it will need to hold a + * refcount again if dma_fence_ops.enable_signaling gets called. This can + * be used for other implementing other types of fence. + * + * context and seqno are used for easy comparison between fences, allowing + * to check which fence is later by simply using dma_fence_later. + */ +void +dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, + spinlock_t *lock, u64 context, unsigned seqno) +{ + BUG_ON(!lock); + BUG_ON(!ops || !ops->wait || !ops->enable_signaling || + !ops->get_driver_name || !ops->get_timeline_name); + + kref_init(&fence->refcount); + fence->ops = ops; + INIT_LIST_HEAD(&fence->cb_list); + fence->lock = lock; + fence->context = context; + fence->seqno = seqno; + fence->flags = 0UL; + + trace_dma_fence_init(fence); +} +EXPORT_SYMBOL(dma_fence_init); diff --git a/drivers/dma-buf/fence-array.c b/drivers/dma-buf/fence-array.c deleted file mode 100644 index f1989fcaf354..000000000000 --- a/drivers/dma-buf/fence-array.c +++ /dev/null @@ -1,145 +0,0 @@ -/* - * fence-array: aggregate fences to be waited together - * - * Copyright (C) 2016 Collabora Ltd - * Copyright (C) 2016 Advanced Micro Devices, Inc. - * Authors: - * Gustavo Padovan - * Christian König - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#include -#include -#include - -static void fence_array_cb_func(struct fence *f, struct fence_cb *cb); - -static const char *fence_array_get_driver_name(struct fence *fence) -{ - return "fence_array"; -} - -static const char *fence_array_get_timeline_name(struct fence *fence) -{ - return "unbound"; -} - -static void fence_array_cb_func(struct fence *f, struct fence_cb *cb) -{ - struct fence_array_cb *array_cb = - container_of(cb, struct fence_array_cb, cb); - struct fence_array *array = array_cb->array; - - if (atomic_dec_and_test(&array->num_pending)) - fence_signal(&array->base); - fence_put(&array->base); -} - -static bool fence_array_enable_signaling(struct fence *fence) -{ - struct fence_array *array = to_fence_array(fence); - struct fence_array_cb *cb = (void *)(&array[1]); - unsigned i; - - for (i = 0; i < array->num_fences; ++i) { - cb[i].array = array; - /* - * As we may report that the fence is signaled before all - * callbacks are complete, we need to take an additional - * reference count on the array so that we do not free it too - * early. The core fence handling will only hold the reference - * until we signal the array as complete (but that is now - * insufficient). - */ - fence_get(&array->base); - if (fence_add_callback(array->fences[i], &cb[i].cb, - fence_array_cb_func)) { - fence_put(&array->base); - if (atomic_dec_and_test(&array->num_pending)) - return false; - } - } - - return true; -} - -static bool fence_array_signaled(struct fence *fence) -{ - struct fence_array *array = to_fence_array(fence); - - return atomic_read(&array->num_pending) <= 0; -} - -static void fence_array_release(struct fence *fence) -{ - struct fence_array *array = to_fence_array(fence); - unsigned i; - - for (i = 0; i < array->num_fences; ++i) - fence_put(array->fences[i]); - - kfree(array->fences); - fence_free(fence); -} - -const struct fence_ops fence_array_ops = { - .get_driver_name = fence_array_get_driver_name, - .get_timeline_name = fence_array_get_timeline_name, - .enable_signaling = fence_array_enable_signaling, - .signaled = fence_array_signaled, - .wait = fence_default_wait, - .release = fence_array_release, -}; -EXPORT_SYMBOL(fence_array_ops); - -/** - * fence_array_create - Create a custom fence array - * @num_fences: [in] number of fences to add in the array - * @fences: [in] array containing the fences - * @context: [in] fence context to use - * @seqno: [in] sequence number to use - * @signal_on_any: [in] signal on any fence in the array - * - * Allocate a fence_array object and initialize the base fence with fence_init(). - * In case of error it returns NULL. - * - * The caller should allocate the fences array with num_fences size - * and fill it with the fences it wants to add to the object. Ownership of this - * array is taken and fence_put() is used on each fence on release. - * - * If @signal_on_any is true the fence array signals if any fence in the array - * signals, otherwise it signals when all fences in the array signal. - */ -struct fence_array *fence_array_create(int num_fences, struct fence **fences, - u64 context, unsigned seqno, - bool signal_on_any) -{ - struct fence_array *array; - size_t size = sizeof(*array); - - /* Allocate the callback structures behind the array. */ - size += num_fences * sizeof(struct fence_array_cb); - array = kzalloc(size, GFP_KERNEL); - if (!array) - return NULL; - - spin_lock_init(&array->lock); - fence_init(&array->base, &fence_array_ops, &array->lock, - context, seqno); - - array->num_fences = num_fences; - atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences); - array->fences = fences; - - return array; -} -EXPORT_SYMBOL(fence_array_create); diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c deleted file mode 100644 index cc05dddc77a6..000000000000 --- a/drivers/dma-buf/fence.c +++ /dev/null @@ -1,534 +0,0 @@ -/* - * Fence mechanism for dma-buf and to allow for asynchronous dma access - * - * Copyright (C) 2012 Canonical Ltd - * Copyright (C) 2012 Texas Instruments - * - * Authors: - * Rob Clark - * Maarten Lankhorst - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#include -#include -#include -#include - -#define CREATE_TRACE_POINTS -#include - -EXPORT_TRACEPOINT_SYMBOL(fence_annotate_wait_on); -EXPORT_TRACEPOINT_SYMBOL(fence_emit); - -/* - * fence context counter: each execution context should have its own - * fence context, this allows checking if fences belong to the same - * context or not. One device can have multiple separate contexts, - * and they're used if some engine can run independently of another. - */ -static atomic64_t fence_context_counter = ATOMIC64_INIT(0); - -/** - * fence_context_alloc - allocate an array of fence contexts - * @num: [in] amount of contexts to allocate - * - * This function will return the first index of the number of fences allocated. - * The fence context is used for setting fence->context to a unique number. - */ -u64 fence_context_alloc(unsigned num) -{ - BUG_ON(!num); - return atomic64_add_return(num, &fence_context_counter) - num; -} -EXPORT_SYMBOL(fence_context_alloc); - -/** - * fence_signal_locked - signal completion of a fence - * @fence: the fence to signal - * - * Signal completion for software callbacks on a fence, this will unblock - * fence_wait() calls and run all the callbacks added with - * fence_add_callback(). Can be called multiple times, but since a fence - * can only go from unsignaled to signaled state, it will only be effective - * the first time. - * - * Unlike fence_signal, this function must be called with fence->lock held. - */ -int fence_signal_locked(struct fence *fence) -{ - struct fence_cb *cur, *tmp; - int ret = 0; - - lockdep_assert_held(fence->lock); - - if (WARN_ON(!fence)) - return -EINVAL; - - if (!ktime_to_ns(fence->timestamp)) { - fence->timestamp = ktime_get(); - smp_mb__before_atomic(); - } - - if (test_and_set_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - ret = -EINVAL; - - /* - * we might have raced with the unlocked fence_signal, - * still run through all callbacks - */ - } else - trace_fence_signaled(fence); - - list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) { - list_del_init(&cur->node); - cur->func(fence, cur); - } - return ret; -} -EXPORT_SYMBOL(fence_signal_locked); - -/** - * fence_signal - signal completion of a fence - * @fence: the fence to signal - * - * Signal completion for software callbacks on a fence, this will unblock - * fence_wait() calls and run all the callbacks added with - * fence_add_callback(). Can be called multiple times, but since a fence - * can only go from unsignaled to signaled state, it will only be effective - * the first time. - */ -int fence_signal(struct fence *fence) -{ - unsigned long flags; - - if (!fence) - return -EINVAL; - - if (!ktime_to_ns(fence->timestamp)) { - fence->timestamp = ktime_get(); - smp_mb__before_atomic(); - } - - if (test_and_set_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - return -EINVAL; - - trace_fence_signaled(fence); - - if (test_bit(FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags)) { - struct fence_cb *cur, *tmp; - - spin_lock_irqsave(fence->lock, flags); - list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) { - list_del_init(&cur->node); - cur->func(fence, cur); - } - spin_unlock_irqrestore(fence->lock, flags); - } - return 0; -} -EXPORT_SYMBOL(fence_signal); - -/** - * fence_wait_timeout - sleep until the fence gets signaled - * or until timeout elapses - * @fence: [in] the fence to wait on - * @intr: [in] if true, do an interruptible wait - * @timeout: [in] timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT - * - * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the - * remaining timeout in jiffies on success. Other error values may be - * returned on custom implementations. - * - * Performs a synchronous wait on this fence. It is assumed the caller - * directly or indirectly (buf-mgr between reservation and committing) - * holds a reference to the fence, otherwise the fence might be - * freed before return, resulting in undefined behavior. - */ -signed long -fence_wait_timeout(struct fence *fence, bool intr, signed long timeout) -{ - signed long ret; - - if (WARN_ON(timeout < 0)) - return -EINVAL; - - if (timeout == 0) - return fence_is_signaled(fence); - - trace_fence_wait_start(fence); - ret = fence->ops->wait(fence, intr, timeout); - trace_fence_wait_end(fence); - return ret; -} -EXPORT_SYMBOL(fence_wait_timeout); - -void fence_release(struct kref *kref) -{ - struct fence *fence = - container_of(kref, struct fence, refcount); - - trace_fence_destroy(fence); - - BUG_ON(!list_empty(&fence->cb_list)); - - if (fence->ops->release) - fence->ops->release(fence); - else - fence_free(fence); -} -EXPORT_SYMBOL(fence_release); - -void fence_free(struct fence *fence) -{ - kfree_rcu(fence, rcu); -} -EXPORT_SYMBOL(fence_free); - -/** - * fence_enable_sw_signaling - enable signaling on fence - * @fence: [in] the fence to enable - * - * this will request for sw signaling to be enabled, to make the fence - * complete as soon as possible - */ -void fence_enable_sw_signaling(struct fence *fence) -{ - unsigned long flags; - - if (!test_and_set_bit(FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags) && - !test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - trace_fence_enable_signal(fence); - - spin_lock_irqsave(fence->lock, flags); - - if (!fence->ops->enable_signaling(fence)) - fence_signal_locked(fence); - - spin_unlock_irqrestore(fence->lock, flags); - } -} -EXPORT_SYMBOL(fence_enable_sw_signaling); - -/** - * fence_add_callback - add a callback to be called when the fence - * is signaled - * @fence: [in] the fence to wait on - * @cb: [in] the callback to register - * @func: [in] the function to call - * - * cb will be initialized by fence_add_callback, no initialization - * by the caller is required. Any number of callbacks can be registered - * to a fence, but a callback can only be registered to one fence at a time. - * - * Note that the callback can be called from an atomic context. If - * fence is already signaled, this function will return -ENOENT (and - * *not* call the callback) - * - * Add a software callback to the fence. Same restrictions apply to - * refcount as it does to fence_wait, however the caller doesn't need to - * keep a refcount to fence afterwards: when software access is enabled, - * the creator of the fence is required to keep the fence alive until - * after it signals with fence_signal. The callback itself can be called - * from irq context. - * - */ -int fence_add_callback(struct fence *fence, struct fence_cb *cb, - fence_func_t func) -{ - unsigned long flags; - int ret = 0; - bool was_set; - - if (WARN_ON(!fence || !func)) - return -EINVAL; - - if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - INIT_LIST_HEAD(&cb->node); - return -ENOENT; - } - - spin_lock_irqsave(fence->lock, flags); - - was_set = test_and_set_bit(FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags); - - if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - ret = -ENOENT; - else if (!was_set) { - trace_fence_enable_signal(fence); - - if (!fence->ops->enable_signaling(fence)) { - fence_signal_locked(fence); - ret = -ENOENT; - } - } - - if (!ret) { - cb->func = func; - list_add_tail(&cb->node, &fence->cb_list); - } else - INIT_LIST_HEAD(&cb->node); - spin_unlock_irqrestore(fence->lock, flags); - - return ret; -} -EXPORT_SYMBOL(fence_add_callback); - -/** - * fence_remove_callback - remove a callback from the signaling list - * @fence: [in] the fence to wait on - * @cb: [in] the callback to remove - * - * Remove a previously queued callback from the fence. This function returns - * true if the callback is successfully removed, or false if the fence has - * already been signaled. - * - * *WARNING*: - * Cancelling a callback should only be done if you really know what you're - * doing, since deadlocks and race conditions could occur all too easily. For - * this reason, it should only ever be done on hardware lockup recovery, - * with a reference held to the fence. - */ -bool -fence_remove_callback(struct fence *fence, struct fence_cb *cb) -{ - unsigned long flags; - bool ret; - - spin_lock_irqsave(fence->lock, flags); - - ret = !list_empty(&cb->node); - if (ret) - list_del_init(&cb->node); - - spin_unlock_irqrestore(fence->lock, flags); - - return ret; -} -EXPORT_SYMBOL(fence_remove_callback); - -struct default_wait_cb { - struct fence_cb base; - struct task_struct *task; -}; - -static void -fence_default_wait_cb(struct fence *fence, struct fence_cb *cb) -{ - struct default_wait_cb *wait = - container_of(cb, struct default_wait_cb, base); - - wake_up_state(wait->task, TASK_NORMAL); -} - -/** - * fence_default_wait - default sleep until the fence gets signaled - * or until timeout elapses - * @fence: [in] the fence to wait on - * @intr: [in] if true, do an interruptible wait - * @timeout: [in] timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT - * - * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the - * remaining timeout in jiffies on success. - */ -signed long -fence_default_wait(struct fence *fence, bool intr, signed long timeout) -{ - struct default_wait_cb cb; - unsigned long flags; - signed long ret = timeout; - bool was_set; - - if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - return timeout; - - spin_lock_irqsave(fence->lock, flags); - - if (intr && signal_pending(current)) { - ret = -ERESTARTSYS; - goto out; - } - - was_set = test_and_set_bit(FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags); - - if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - goto out; - - if (!was_set) { - trace_fence_enable_signal(fence); - - if (!fence->ops->enable_signaling(fence)) { - fence_signal_locked(fence); - goto out; - } - } - - cb.base.func = fence_default_wait_cb; - cb.task = current; - list_add(&cb.base.node, &fence->cb_list); - - while (!test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags) && ret > 0) { - if (intr) - __set_current_state(TASK_INTERRUPTIBLE); - else - __set_current_state(TASK_UNINTERRUPTIBLE); - spin_unlock_irqrestore(fence->lock, flags); - - ret = schedule_timeout(ret); - - spin_lock_irqsave(fence->lock, flags); - if (ret > 0 && intr && signal_pending(current)) - ret = -ERESTARTSYS; - } - - if (!list_empty(&cb.base.node)) - list_del(&cb.base.node); - __set_current_state(TASK_RUNNING); - -out: - spin_unlock_irqrestore(fence->lock, flags); - return ret; -} -EXPORT_SYMBOL(fence_default_wait); - -static bool -fence_test_signaled_any(struct fence **fences, uint32_t count) -{ - int i; - - for (i = 0; i < count; ++i) { - struct fence *fence = fences[i]; - if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - return true; - } - return false; -} - -/** - * fence_wait_any_timeout - sleep until any fence gets signaled - * or until timeout elapses - * @fences: [in] array of fences to wait on - * @count: [in] number of fences to wait on - * @intr: [in] if true, do an interruptible wait - * @timeout: [in] timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT - * - * Returns -EINVAL on custom fence wait implementation, -ERESTARTSYS if - * interrupted, 0 if the wait timed out, or the remaining timeout in jiffies - * on success. - * - * Synchronous waits for the first fence in the array to be signaled. The - * caller needs to hold a reference to all fences in the array, otherwise a - * fence might be freed before return, resulting in undefined behavior. - */ -signed long -fence_wait_any_timeout(struct fence **fences, uint32_t count, - bool intr, signed long timeout) -{ - struct default_wait_cb *cb; - signed long ret = timeout; - unsigned i; - - if (WARN_ON(!fences || !count || timeout < 0)) - return -EINVAL; - - if (timeout == 0) { - for (i = 0; i < count; ++i) - if (fence_is_signaled(fences[i])) - return 1; - - return 0; - } - - cb = kcalloc(count, sizeof(struct default_wait_cb), GFP_KERNEL); - if (cb == NULL) { - ret = -ENOMEM; - goto err_free_cb; - } - - for (i = 0; i < count; ++i) { - struct fence *fence = fences[i]; - - if (fence->ops->wait != fence_default_wait) { - ret = -EINVAL; - goto fence_rm_cb; - } - - cb[i].task = current; - if (fence_add_callback(fence, &cb[i].base, - fence_default_wait_cb)) { - /* This fence is already signaled */ - goto fence_rm_cb; - } - } - - while (ret > 0) { - if (intr) - set_current_state(TASK_INTERRUPTIBLE); - else - set_current_state(TASK_UNINTERRUPTIBLE); - - if (fence_test_signaled_any(fences, count)) - break; - - ret = schedule_timeout(ret); - - if (ret > 0 && intr && signal_pending(current)) - ret = -ERESTARTSYS; - } - - __set_current_state(TASK_RUNNING); - -fence_rm_cb: - while (i-- > 0) - fence_remove_callback(fences[i], &cb[i].base); - -err_free_cb: - kfree(cb); - - return ret; -} -EXPORT_SYMBOL(fence_wait_any_timeout); - -/** - * fence_init - Initialize a custom fence. - * @fence: [in] the fence to initialize - * @ops: [in] the fence_ops for operations on this fence - * @lock: [in] the irqsafe spinlock to use for locking this fence - * @context: [in] the execution context this fence is run on - * @seqno: [in] a linear increasing sequence number for this context - * - * Initializes an allocated fence, the caller doesn't have to keep its - * refcount after committing with this fence, but it will need to hold a - * refcount again if fence_ops.enable_signaling gets called. This can - * be used for other implementing other types of fence. - * - * context and seqno are used for easy comparison between fences, allowing - * to check which fence is later by simply using fence_later. - */ -void -fence_init(struct fence *fence, const struct fence_ops *ops, - spinlock_t *lock, u64 context, unsigned seqno) -{ - BUG_ON(!lock); - BUG_ON(!ops || !ops->wait || !ops->enable_signaling || - !ops->get_driver_name || !ops->get_timeline_name); - - kref_init(&fence->refcount); - fence->ops = ops; - INIT_LIST_HEAD(&fence->cb_list); - fence->lock = lock; - fence->context = context; - fence->seqno = seqno; - fence->flags = 0UL; - - trace_fence_init(fence); -} -EXPORT_SYMBOL(fence_init); diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index 82de59f7cbbd..7ed56f3edfb7 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -102,17 +102,17 @@ EXPORT_SYMBOL(reservation_object_reserve_shared); static void reservation_object_add_shared_inplace(struct reservation_object *obj, struct reservation_object_list *fobj, - struct fence *fence) + struct dma_fence *fence) { u32 i; - fence_get(fence); + dma_fence_get(fence); preempt_disable(); write_seqcount_begin(&obj->seq); for (i = 0; i < fobj->shared_count; ++i) { - struct fence *old_fence; + struct dma_fence *old_fence; old_fence = rcu_dereference_protected(fobj->shared[i], reservation_object_held(obj)); @@ -123,7 +123,7 @@ reservation_object_add_shared_inplace(struct reservation_object *obj, write_seqcount_end(&obj->seq); preempt_enable(); - fence_put(old_fence); + dma_fence_put(old_fence); return; } } @@ -143,12 +143,12 @@ static void reservation_object_add_shared_replace(struct reservation_object *obj, struct reservation_object_list *old, struct reservation_object_list *fobj, - struct fence *fence) + struct dma_fence *fence) { unsigned i; - struct fence *old_fence = NULL; + struct dma_fence *old_fence = NULL; - fence_get(fence); + dma_fence_get(fence); if (!old) { RCU_INIT_POINTER(fobj->shared[0], fence); @@ -165,7 +165,7 @@ reservation_object_add_shared_replace(struct reservation_object *obj, fobj->shared_count = old->shared_count; for (i = 0; i < old->shared_count; ++i) { - struct fence *check; + struct dma_fence *check; check = rcu_dereference_protected(old->shared[i], reservation_object_held(obj)); @@ -196,7 +196,7 @@ done: kfree_rcu(old, rcu); if (old_fence) - fence_put(old_fence); + dma_fence_put(old_fence); } /** @@ -208,7 +208,7 @@ done: * reservation_object_reserve_shared() has been called. */ void reservation_object_add_shared_fence(struct reservation_object *obj, - struct fence *fence) + struct dma_fence *fence) { struct reservation_object_list *old, *fobj = obj->staged; @@ -231,9 +231,9 @@ EXPORT_SYMBOL(reservation_object_add_shared_fence); * Add a fence to the exclusive slot. The obj->lock must be held. */ void reservation_object_add_excl_fence(struct reservation_object *obj, - struct fence *fence) + struct dma_fence *fence) { - struct fence *old_fence = reservation_object_get_excl(obj); + struct dma_fence *old_fence = reservation_object_get_excl(obj); struct reservation_object_list *old; u32 i = 0; @@ -242,7 +242,7 @@ void reservation_object_add_excl_fence(struct reservation_object *obj, i = old->shared_count; if (fence) - fence_get(fence); + dma_fence_get(fence); preempt_disable(); write_seqcount_begin(&obj->seq); @@ -255,11 +255,11 @@ void reservation_object_add_excl_fence(struct reservation_object *obj, /* inplace update, no shared fences */ while (i--) - fence_put(rcu_dereference_protected(old->shared[i], + dma_fence_put(rcu_dereference_protected(old->shared[i], reservation_object_held(obj))); if (old_fence) - fence_put(old_fence); + dma_fence_put(old_fence); } EXPORT_SYMBOL(reservation_object_add_excl_fence); @@ -276,12 +276,12 @@ EXPORT_SYMBOL(reservation_object_add_excl_fence); * Zero or -errno */ int reservation_object_get_fences_rcu(struct reservation_object *obj, - struct fence **pfence_excl, + struct dma_fence **pfence_excl, unsigned *pshared_count, - struct fence ***pshared) + struct dma_fence ***pshared) { - struct fence **shared = NULL; - struct fence *fence_excl; + struct dma_fence **shared = NULL; + struct dma_fence *fence_excl; unsigned int shared_count; int ret = 1; @@ -296,12 +296,12 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj, seq = read_seqcount_begin(&obj->seq); fence_excl = rcu_dereference(obj->fence_excl); - if (fence_excl && !fence_get_rcu(fence_excl)) + if (fence_excl && !dma_fence_get_rcu(fence_excl)) goto unlock; fobj = rcu_dereference(obj->fence); if (fobj) { - struct fence **nshared; + struct dma_fence **nshared; size_t sz = sizeof(*shared) * fobj->shared_max; nshared = krealloc(shared, sz, @@ -322,15 +322,15 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj, for (i = 0; i < shared_count; ++i) { shared[i] = rcu_dereference(fobj->shared[i]); - if (!fence_get_rcu(shared[i])) + if (!dma_fence_get_rcu(shared[i])) break; } } if (i != shared_count || read_seqcount_retry(&obj->seq, seq)) { while (i--) - fence_put(shared[i]); - fence_put(fence_excl); + dma_fence_put(shared[i]); + dma_fence_put(fence_excl); goto unlock; } @@ -368,7 +368,7 @@ long reservation_object_wait_timeout_rcu(struct reservation_object *obj, bool wait_all, bool intr, unsigned long timeout) { - struct fence *fence; + struct dma_fence *fence; unsigned seq, shared_count, i = 0; long ret = timeout; @@ -389,16 +389,17 @@ retry: shared_count = fobj->shared_count; for (i = 0; i < shared_count; ++i) { - struct fence *lfence = rcu_dereference(fobj->shared[i]); + struct dma_fence *lfence = rcu_dereference(fobj->shared[i]); - if (test_bit(FENCE_FLAG_SIGNALED_BIT, &lfence->flags)) + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &lfence->flags)) continue; - if (!fence_get_rcu(lfence)) + if (!dma_fence_get_rcu(lfence)) goto unlock_retry; - if (fence_is_signaled(lfence)) { - fence_put(lfence); + if (dma_fence_is_signaled(lfence)) { + dma_fence_put(lfence); continue; } @@ -408,15 +409,16 @@ retry: } if (!shared_count) { - struct fence *fence_excl = rcu_dereference(obj->fence_excl); + struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl); if (fence_excl && - !test_bit(FENCE_FLAG_SIGNALED_BIT, &fence_excl->flags)) { - if (!fence_get_rcu(fence_excl)) + !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &fence_excl->flags)) { + if (!dma_fence_get_rcu(fence_excl)) goto unlock_retry; - if (fence_is_signaled(fence_excl)) - fence_put(fence_excl); + if (dma_fence_is_signaled(fence_excl)) + dma_fence_put(fence_excl); else fence = fence_excl; } @@ -425,12 +427,12 @@ retry: rcu_read_unlock(); if (fence) { if (read_seqcount_retry(&obj->seq, seq)) { - fence_put(fence); + dma_fence_put(fence); goto retry; } - ret = fence_wait_timeout(fence, intr, ret); - fence_put(fence); + ret = dma_fence_wait_timeout(fence, intr, ret); + dma_fence_put(fence); if (ret > 0 && wait_all && (i + 1 < shared_count)) goto retry; } @@ -444,18 +446,18 @@ EXPORT_SYMBOL_GPL(reservation_object_wait_timeout_rcu); static inline int -reservation_object_test_signaled_single(struct fence *passed_fence) +reservation_object_test_signaled_single(struct dma_fence *passed_fence) { - struct fence *fence, *lfence = passed_fence; + struct dma_fence *fence, *lfence = passed_fence; int ret = 1; - if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &lfence->flags)) { - fence = fence_get_rcu(lfence); + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &lfence->flags)) { + fence = dma_fence_get_rcu(lfence); if (!fence) return -1; - ret = !!fence_is_signaled(fence); - fence_put(fence); + ret = !!dma_fence_is_signaled(fence); + dma_fence_put(fence); } return ret; } @@ -492,7 +494,7 @@ retry: shared_count = fobj->shared_count; for (i = 0; i < shared_count; ++i) { - struct fence *fence = rcu_dereference(fobj->shared[i]); + struct dma_fence *fence = rcu_dereference(fobj->shared[i]); ret = reservation_object_test_signaled_single(fence); if (ret < 0) @@ -506,7 +508,7 @@ retry: } if (!shared_count) { - struct fence *fence_excl = rcu_dereference(obj->fence_excl); + struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl); if (fence_excl) { ret = reservation_object_test_signaled_single( diff --git a/drivers/dma-buf/seqno-fence.c b/drivers/dma-buf/seqno-fence.c index 71127f8f1626..f47112a64763 100644 --- a/drivers/dma-buf/seqno-fence.c +++ b/drivers/dma-buf/seqno-fence.c @@ -21,35 +21,35 @@ #include #include -static const char *seqno_fence_get_driver_name(struct fence *fence) +static const char *seqno_fence_get_driver_name(struct dma_fence *fence) { struct seqno_fence *seqno_fence = to_seqno_fence(fence); return seqno_fence->ops->get_driver_name(fence); } -static const char *seqno_fence_get_timeline_name(struct fence *fence) +static const char *seqno_fence_get_timeline_name(struct dma_fence *fence) { struct seqno_fence *seqno_fence = to_seqno_fence(fence); return seqno_fence->ops->get_timeline_name(fence); } -static bool seqno_enable_signaling(struct fence *fence) +static bool seqno_enable_signaling(struct dma_fence *fence) { struct seqno_fence *seqno_fence = to_seqno_fence(fence); return seqno_fence->ops->enable_signaling(fence); } -static bool seqno_signaled(struct fence *fence) +static bool seqno_signaled(struct dma_fence *fence) { struct seqno_fence *seqno_fence = to_seqno_fence(fence); return seqno_fence->ops->signaled && seqno_fence->ops->signaled(fence); } -static void seqno_release(struct fence *fence) +static void seqno_release(struct dma_fence *fence) { struct seqno_fence *f = to_seqno_fence(fence); @@ -57,18 +57,18 @@ static void seqno_release(struct fence *fence) if (f->ops->release) f->ops->release(fence); else - fence_free(&f->base); + dma_fence_free(&f->base); } -static signed long seqno_wait(struct fence *fence, bool intr, - signed long timeout) +static signed long seqno_wait(struct dma_fence *fence, bool intr, + signed long timeout) { struct seqno_fence *f = to_seqno_fence(fence); return f->ops->wait(fence, intr, timeout); } -const struct fence_ops seqno_fence_ops = { +const struct dma_fence_ops seqno_fence_ops = { .get_driver_name = seqno_fence_get_driver_name, .get_timeline_name = seqno_fence_get_timeline_name, .enable_signaling = seqno_enable_signaling, diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c index 62e8e6dc7953..82e0ca4dd0c1 100644 --- a/drivers/dma-buf/sw_sync.c +++ b/drivers/dma-buf/sw_sync.c @@ -68,9 +68,9 @@ struct sw_sync_create_fence_data { #define SW_SYNC_IOC_INC _IOW(SW_SYNC_IOC_MAGIC, 1, __u32) -static const struct fence_ops timeline_fence_ops; +static const struct dma_fence_ops timeline_fence_ops; -static inline struct sync_pt *fence_to_sync_pt(struct fence *fence) +static inline struct sync_pt *dma_fence_to_sync_pt(struct dma_fence *fence) { if (fence->ops != &timeline_fence_ops) return NULL; @@ -93,7 +93,7 @@ struct sync_timeline *sync_timeline_create(const char *name) return NULL; kref_init(&obj->kref); - obj->context = fence_context_alloc(1); + obj->context = dma_fence_context_alloc(1); strlcpy(obj->name, name, sizeof(obj->name)); INIT_LIST_HEAD(&obj->child_list_head); @@ -146,7 +146,7 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc) list_for_each_entry_safe(pt, next, &obj->active_list_head, active_list) { - if (fence_is_signaled_locked(&pt->base)) + if (dma_fence_is_signaled_locked(&pt->base)) list_del_init(&pt->active_list); } @@ -179,30 +179,30 @@ static struct sync_pt *sync_pt_create(struct sync_timeline *obj, int size, spin_lock_irqsave(&obj->child_list_lock, flags); sync_timeline_get(obj); - fence_init(&pt->base, &timeline_fence_ops, &obj->child_list_lock, - obj->context, value); + dma_fence_init(&pt->base, &timeline_fence_ops, &obj->child_list_lock, + obj->context, value); list_add_tail(&pt->child_list, &obj->child_list_head); INIT_LIST_HEAD(&pt->active_list); spin_unlock_irqrestore(&obj->child_list_lock, flags); return pt; } -static const char *timeline_fence_get_driver_name(struct fence *fence) +static const char *timeline_fence_get_driver_name(struct dma_fence *fence) { return "sw_sync"; } -static const char *timeline_fence_get_timeline_name(struct fence *fence) +static const char *timeline_fence_get_timeline_name(struct dma_fence *fence) { - struct sync_timeline *parent = fence_parent(fence); + struct sync_timeline *parent = dma_fence_parent(fence); return parent->name; } -static void timeline_fence_release(struct fence *fence) +static void timeline_fence_release(struct dma_fence *fence) { - struct sync_pt *pt = fence_to_sync_pt(fence); - struct sync_timeline *parent = fence_parent(fence); + struct sync_pt *pt = dma_fence_to_sync_pt(fence); + struct sync_timeline *parent = dma_fence_parent(fence); unsigned long flags; spin_lock_irqsave(fence->lock, flags); @@ -212,20 +212,20 @@ static void timeline_fence_release(struct fence *fence) spin_unlock_irqrestore(fence->lock, flags); sync_timeline_put(parent); - fence_free(fence); + dma_fence_free(fence); } -static bool timeline_fence_signaled(struct fence *fence) +static bool timeline_fence_signaled(struct dma_fence *fence) { - struct sync_timeline *parent = fence_parent(fence); + struct sync_timeline *parent = dma_fence_parent(fence); return (fence->seqno > parent->value) ? false : true; } -static bool timeline_fence_enable_signaling(struct fence *fence) +static bool timeline_fence_enable_signaling(struct dma_fence *fence) { - struct sync_pt *pt = fence_to_sync_pt(fence); - struct sync_timeline *parent = fence_parent(fence); + struct sync_pt *pt = dma_fence_to_sync_pt(fence); + struct sync_timeline *parent = dma_fence_parent(fence); if (timeline_fence_signaled(fence)) return false; @@ -234,26 +234,26 @@ static bool timeline_fence_enable_signaling(struct fence *fence) return true; } -static void timeline_fence_value_str(struct fence *fence, +static void timeline_fence_value_str(struct dma_fence *fence, char *str, int size) { snprintf(str, size, "%d", fence->seqno); } -static void timeline_fence_timeline_value_str(struct fence *fence, +static void timeline_fence_timeline_value_str(struct dma_fence *fence, char *str, int size) { - struct sync_timeline *parent = fence_parent(fence); + struct sync_timeline *parent = dma_fence_parent(fence); snprintf(str, size, "%d", parent->value); } -static const struct fence_ops timeline_fence_ops = { +static const struct dma_fence_ops timeline_fence_ops = { .get_driver_name = timeline_fence_get_driver_name, .get_timeline_name = timeline_fence_get_timeline_name, .enable_signaling = timeline_fence_enable_signaling, .signaled = timeline_fence_signaled, - .wait = fence_default_wait, + .wait = dma_fence_default_wait, .release = timeline_fence_release, .fence_value_str = timeline_fence_value_str, .timeline_value_str = timeline_fence_timeline_value_str, @@ -317,7 +317,7 @@ static long sw_sync_ioctl_create_fence(struct sync_timeline *obj, sync_file = sync_file_create(&pt->base); if (!sync_file) { - fence_put(&pt->base); + dma_fence_put(&pt->base); err = -ENOMEM; goto err; } diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c index 2dd4c3db6caa..48b20e34fb6d 100644 --- a/drivers/dma-buf/sync_debug.c +++ b/drivers/dma-buf/sync_debug.c @@ -71,12 +71,13 @@ static const char *sync_status_str(int status) return "error"; } -static void sync_print_fence(struct seq_file *s, struct fence *fence, bool show) +static void sync_print_fence(struct seq_file *s, + struct dma_fence *fence, bool show) { int status = 1; - struct sync_timeline *parent = fence_parent(fence); + struct sync_timeline *parent = dma_fence_parent(fence); - if (fence_is_signaled_locked(fence)) + if (dma_fence_is_signaled_locked(fence)) status = fence->status; seq_printf(s, " %s%sfence %s", @@ -135,10 +136,10 @@ static void sync_print_sync_file(struct seq_file *s, int i; seq_printf(s, "[%p] %s: %s\n", sync_file, sync_file->name, - sync_status_str(!fence_is_signaled(sync_file->fence))); + sync_status_str(!dma_fence_is_signaled(sync_file->fence))); - if (fence_is_array(sync_file->fence)) { - struct fence_array *array = to_fence_array(sync_file->fence); + if (dma_fence_is_array(sync_file->fence)) { + struct dma_fence_array *array = to_dma_fence_array(sync_file->fence); for (i = 0; i < array->num_fences; ++i) sync_print_fence(s, array->fences[i], true); diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h index d269aa6783aa..26fe8b9907b3 100644 --- a/drivers/dma-buf/sync_debug.h +++ b/drivers/dma-buf/sync_debug.h @@ -15,7 +15,7 @@ #include #include -#include +#include #include #include @@ -45,10 +45,9 @@ struct sync_timeline { struct list_head sync_timeline_list; }; -static inline struct sync_timeline *fence_parent(struct fence *fence) +static inline struct sync_timeline *dma_fence_parent(struct dma_fence *fence) { - return container_of(fence->lock, struct sync_timeline, - child_list_lock); + return container_of(fence->lock, struct sync_timeline, child_list_lock); } /** @@ -58,7 +57,7 @@ static inline struct sync_timeline *fence_parent(struct fence *fence) * @active_list: sync timeline active child's list */ struct sync_pt { - struct fence base; + struct dma_fence base; struct list_head child_list; struct list_head active_list; }; diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 235f8ac113cc..69d8ef98d34c 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -54,7 +54,7 @@ err: return NULL; } -static void fence_check_cb_func(struct fence *f, struct fence_cb *cb) +static void fence_check_cb_func(struct dma_fence *f, struct dma_fence_cb *cb) { struct sync_file *sync_file; @@ -71,7 +71,7 @@ static void fence_check_cb_func(struct fence *f, struct fence_cb *cb) * takes ownership of @fence. The sync_file can be released with * fput(sync_file->file). Returns the sync_file or NULL in case of error. */ -struct sync_file *sync_file_create(struct fence *fence) +struct sync_file *sync_file_create(struct dma_fence *fence) { struct sync_file *sync_file; @@ -79,7 +79,7 @@ struct sync_file *sync_file_create(struct fence *fence) if (!sync_file) return NULL; - sync_file->fence = fence_get(fence); + sync_file->fence = dma_fence_get(fence); snprintf(sync_file->name, sizeof(sync_file->name), "%s-%s%llu-%d", fence->ops->get_driver_name(fence), @@ -121,16 +121,16 @@ err: * Ensures @fd references a valid sync_file and returns a fence that * represents all fence in the sync_file. On error NULL is returned. */ -struct fence *sync_file_get_fence(int fd) +struct dma_fence *sync_file_get_fence(int fd) { struct sync_file *sync_file; - struct fence *fence; + struct dma_fence *fence; sync_file = sync_file_fdget(fd); if (!sync_file) return NULL; - fence = fence_get(sync_file->fence); + fence = dma_fence_get(sync_file->fence); fput(sync_file->file); return fence; @@ -138,22 +138,23 @@ struct fence *sync_file_get_fence(int fd) EXPORT_SYMBOL(sync_file_get_fence); static int sync_file_set_fence(struct sync_file *sync_file, - struct fence **fences, int num_fences) + struct dma_fence **fences, int num_fences) { - struct fence_array *array; + struct dma_fence_array *array; /* * The reference for the fences in the new sync_file and held * in add_fence() during the merge procedure, so for num_fences == 1 * we already own a new reference to the fence. For num_fence > 1 - * we own the reference of the fence_array creation. + * we own the reference of the dma_fence_array creation. */ if (num_fences == 1) { sync_file->fence = fences[0]; kfree(fences); } else { - array = fence_array_create(num_fences, fences, - fence_context_alloc(1), 1, false); + array = dma_fence_array_create(num_fences, fences, + dma_fence_context_alloc(1), + 1, false); if (!array) return -ENOMEM; @@ -163,10 +164,11 @@ static int sync_file_set_fence(struct sync_file *sync_file, return 0; } -static struct fence **get_fences(struct sync_file *sync_file, int *num_fences) +static struct dma_fence **get_fences(struct sync_file *sync_file, + int *num_fences) { - if (fence_is_array(sync_file->fence)) { - struct fence_array *array = to_fence_array(sync_file->fence); + if (dma_fence_is_array(sync_file->fence)) { + struct dma_fence_array *array = to_dma_fence_array(sync_file->fence); *num_fences = array->num_fences; return array->fences; @@ -176,12 +178,13 @@ static struct fence **get_fences(struct sync_file *sync_file, int *num_fences) return &sync_file->fence; } -static void add_fence(struct fence **fences, int *i, struct fence *fence) +static void add_fence(struct dma_fence **fences, + int *i, struct dma_fence *fence) { fences[*i] = fence; - if (!fence_is_signaled(fence)) { - fence_get(fence); + if (!dma_fence_is_signaled(fence)) { + dma_fence_get(fence); (*i)++; } } @@ -200,7 +203,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, struct sync_file *b) { struct sync_file *sync_file; - struct fence **fences, **nfences, **a_fences, **b_fences; + struct dma_fence **fences, **nfences, **a_fences, **b_fences; int i, i_a, i_b, num_fences, a_num_fences, b_num_fences; sync_file = sync_file_alloc(); @@ -226,8 +229,8 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, * and sync_file_create, this is a reasonable assumption. */ for (i = i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) { - struct fence *pt_a = a_fences[i_a]; - struct fence *pt_b = b_fences[i_b]; + struct dma_fence *pt_a = a_fences[i_a]; + struct dma_fence *pt_b = b_fences[i_b]; if (pt_a->context < pt_b->context) { add_fence(fences, &i, pt_a); @@ -255,7 +258,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, add_fence(fences, &i, b_fences[i_b]); if (i == 0) - fences[i++] = fence_get(a_fences[0]); + fences[i++] = dma_fence_get(a_fences[0]); if (num_fences > i) { nfences = krealloc(fences, i * sizeof(*fences), @@ -286,8 +289,8 @@ static void sync_file_free(struct kref *kref) kref); if (test_bit(POLL_ENABLED, &sync_file->fence->flags)) - fence_remove_callback(sync_file->fence, &sync_file->cb); - fence_put(sync_file->fence); + dma_fence_remove_callback(sync_file->fence, &sync_file->cb); + dma_fence_put(sync_file->fence); kfree(sync_file); } @@ -307,12 +310,12 @@ static unsigned int sync_file_poll(struct file *file, poll_table *wait) if (!poll_does_not_wait(wait) && !test_and_set_bit(POLL_ENABLED, &sync_file->fence->flags)) { - if (fence_add_callback(sync_file->fence, &sync_file->cb, - fence_check_cb_func) < 0) + if (dma_fence_add_callback(sync_file->fence, &sync_file->cb, + fence_check_cb_func) < 0) wake_up_all(&sync_file->wq); } - return fence_is_signaled(sync_file->fence) ? POLLIN : 0; + return dma_fence_is_signaled(sync_file->fence) ? POLLIN : 0; } static long sync_file_ioctl_merge(struct sync_file *sync_file, @@ -370,14 +373,14 @@ err_put_fd: return err; } -static void sync_fill_fence_info(struct fence *fence, +static void sync_fill_fence_info(struct dma_fence *fence, struct sync_fence_info *info) { strlcpy(info->obj_name, fence->ops->get_timeline_name(fence), sizeof(info->obj_name)); strlcpy(info->driver_name, fence->ops->get_driver_name(fence), sizeof(info->driver_name)); - if (fence_is_signaled(fence)) + if (dma_fence_is_signaled(fence)) info->status = fence->status >= 0 ? 1 : fence->status; else info->status = 0; @@ -389,7 +392,7 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file, { struct sync_file_info info; struct sync_fence_info *fence_info = NULL; - struct fence **fences; + struct dma_fence **fences; __u32 size; int num_fences, ret, i; @@ -429,7 +432,7 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file, no_fences: strlcpy(info.name, sync_file->name, sizeof(info.name)); - info.status = fence_is_signaled(sync_file->fence); + info.status = dma_fence_is_signaled(sync_file->fence); info.num_fences = num_fences; if (copy_to_user((void __user *)arg, &info, sizeof(info))) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 039b57e4644c..283d05927d15 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include @@ -378,7 +378,7 @@ struct amdgpu_fence_driver { struct timer_list fallback_timer; unsigned num_fences_mask; spinlock_t lock; - struct fence **fences; + struct dma_fence **fences; }; /* some special values for the owner field */ @@ -399,7 +399,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, unsigned irq_type); void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); void amdgpu_fence_driver_resume(struct amdgpu_device *adev); -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **fence); +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence); void amdgpu_fence_process(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); @@ -427,7 +427,7 @@ struct amdgpu_bo_va_mapping { struct amdgpu_bo_va { /* protected by bo being reserved */ struct list_head bo_list; - struct fence *last_pt_update; + struct dma_fence *last_pt_update; unsigned ref_count; /* protected by vm mutex and spinlock */ @@ -543,7 +543,7 @@ struct amdgpu_sa_bo { struct amdgpu_sa_manager *manager; unsigned soffset; unsigned eoffset; - struct fence *fence; + struct dma_fence *fence; }; /* @@ -566,19 +566,19 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp, */ struct amdgpu_sync { DECLARE_HASHTABLE(fences, 4); - struct fence *last_vm_update; + struct dma_fence *last_vm_update; }; void amdgpu_sync_create(struct amdgpu_sync *sync); int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, - struct fence *f); + struct dma_fence *f); int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct reservation_object *resv, void *owner); -struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, +struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); -struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); +struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); void amdgpu_sync_free(struct amdgpu_sync *sync); int amdgpu_sync_init(void); void amdgpu_sync_fini(void); @@ -703,10 +703,10 @@ struct amdgpu_flip_work { uint64_t base; struct drm_pending_vblank_event *event; struct amdgpu_bo *old_abo; - struct fence *excl; + struct dma_fence *excl; unsigned shared_count; - struct fence **shared; - struct fence_cb cb; + struct dma_fence **shared; + struct dma_fence_cb cb; bool async; }; @@ -742,7 +742,7 @@ void amdgpu_job_free_resources(struct amdgpu_job *job); void amdgpu_job_free(struct amdgpu_job *job); int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, struct amd_sched_entity *entity, void *owner, - struct fence **f); + struct dma_fence **f); struct amdgpu_ring { struct amdgpu_device *adev; @@ -844,7 +844,7 @@ struct amdgpu_vm { /* contains the page directory */ struct amdgpu_bo *page_directory; unsigned max_pde_used; - struct fence *page_directory_fence; + struct dma_fence *page_directory_fence; uint64_t last_eviction_counter; /* array of page tables, one for each page directory entry */ @@ -865,14 +865,14 @@ struct amdgpu_vm { struct amdgpu_vm_id { struct list_head list; - struct fence *first; + struct dma_fence *first; struct amdgpu_sync active; - struct fence *last_flush; + struct dma_fence *last_flush; atomic64_t owner; uint64_t pd_gpu_addr; /* last flushed PD/PT update */ - struct fence *flushed_updates; + struct dma_fence *flushed_updates; uint32_t current_gpu_reset_count; @@ -921,7 +921,7 @@ void amdgpu_vm_get_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, struct amdgpu_vm *vm); int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync, struct fence *fence, + struct amdgpu_sync *sync, struct dma_fence *fence, struct amdgpu_job *job); int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job); void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id); @@ -957,7 +957,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_ctx_ring { uint64_t sequence; - struct fence **fences; + struct dma_fence **fences; struct amd_sched_entity entity; }; @@ -966,7 +966,7 @@ struct amdgpu_ctx { struct amdgpu_device *adev; unsigned reset_counter; spinlock_t ring_lock; - struct fence **fences; + struct dma_fence **fences; struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; bool preamble_presented; }; @@ -982,8 +982,8 @@ struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); int amdgpu_ctx_put(struct amdgpu_ctx *ctx); uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, - struct fence *fence); -struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, + struct dma_fence *fence); +struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, uint64_t seq); int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, @@ -1181,10 +1181,10 @@ struct amdgpu_gfx { int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned size, struct amdgpu_ib *ib); void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, - struct fence *f); + struct dma_fence *f); int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, - struct amdgpu_ib *ib, struct fence *last_vm_update, - struct amdgpu_job *job, struct fence **f); + struct amdgpu_ib *ib, struct dma_fence *last_vm_update, + struct amdgpu_job *job, struct dma_fence **f); int amdgpu_ib_pool_init(struct amdgpu_device *adev); void amdgpu_ib_pool_fini(struct amdgpu_device *adev); int amdgpu_ib_ring_tests(struct amdgpu_device *adev); @@ -1225,7 +1225,7 @@ struct amdgpu_cs_parser { struct amdgpu_bo_list *bo_list; struct amdgpu_bo_list_entry vm_pd; struct list_head validated; - struct fence *fence; + struct dma_fence *fence; uint64_t bytes_moved_threshold; uint64_t bytes_moved; struct amdgpu_bo_list_entry *evictable; @@ -1245,7 +1245,7 @@ struct amdgpu_job { struct amdgpu_ring *ring; struct amdgpu_sync sync; struct amdgpu_ib *ibs; - struct fence *fence; /* the hw fence */ + struct dma_fence *fence; /* the hw fence */ uint32_t preamble_status; uint32_t num_ibs; void *owner; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 345305235349..cc97eee93226 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, { unsigned long start_jiffies; unsigned long end_jiffies; - struct fence *fence = NULL; + struct dma_fence *fence = NULL; int i, r; start_jiffies = jiffies; @@ -43,17 +43,17 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, false); if (r) goto exit_do_move; - r = fence_wait(fence, false); + r = dma_fence_wait(fence, false); if (r) goto exit_do_move; - fence_put(fence); + dma_fence_put(fence); } end_jiffies = jiffies; r = jiffies_to_msecs(end_jiffies - start_jiffies); exit_do_move: if (fence) - fence_put(fence); + dma_fence_put(fence); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b0f6e6957536..5d582265e929 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -719,7 +719,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); } - fence_put(parser->fence); + dma_fence_put(parser->fence); if (parser->ctx) amdgpu_ctx_put(parser->ctx); @@ -756,7 +756,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, if (p->bo_list) { for (i = 0; i < p->bo_list->num_entries; i++) { - struct fence *f; + struct dma_fence *f; /* ignore duplicates */ bo = p->bo_list->array[i].robj; @@ -956,7 +956,7 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, for (j = 0; j < num_deps; ++j) { struct amdgpu_ring *ring; struct amdgpu_ctx *ctx; - struct fence *fence; + struct dma_fence *fence; r = amdgpu_cs_get_ring(adev, deps[j].ip_type, deps[j].ip_instance, @@ -978,7 +978,7 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, } else if (fence) { r = amdgpu_sync_fence(adev, &p->job->sync, fence); - fence_put(fence); + dma_fence_put(fence); amdgpu_ctx_put(ctx); if (r) return r; @@ -1008,7 +1008,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job->owner = p->filp; job->fence_ctx = entity->fence_context; - p->fence = fence_get(&job->base.s_fence->finished); + p->fence = dma_fence_get(&job->base.s_fence->finished); cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); job->uf_sequence = cs->out.handle; amdgpu_job_free_resources(job); @@ -1091,7 +1091,7 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); struct amdgpu_ring *ring = NULL; struct amdgpu_ctx *ctx; - struct fence *fence; + struct dma_fence *fence; long r; r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, @@ -1107,8 +1107,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, if (IS_ERR(fence)) r = PTR_ERR(fence); else if (fence) { - r = fence_wait_timeout(fence, true, timeout); - fence_put(fence); + r = dma_fence_wait_timeout(fence, true, timeout); + dma_fence_put(fence); } else r = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index a5e2fcbef0f0..99bbc860322f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -35,7 +35,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) kref_init(&ctx->refcount); spin_lock_init(&ctx->ring_lock); ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS, - sizeof(struct fence*), GFP_KERNEL); + sizeof(struct dma_fence*), GFP_KERNEL); if (!ctx->fences) return -ENOMEM; @@ -79,7 +79,7 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) for (j = 0; j < amdgpu_sched_jobs; ++j) - fence_put(ctx->rings[i].fences[j]); + dma_fence_put(ctx->rings[i].fences[j]); kfree(ctx->fences); ctx->fences = NULL; @@ -241,39 +241,39 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) } uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, - struct fence *fence) + struct dma_fence *fence) { struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; uint64_t seq = cring->sequence; unsigned idx = 0; - struct fence *other = NULL; + struct dma_fence *other = NULL; idx = seq & (amdgpu_sched_jobs - 1); other = cring->fences[idx]; if (other) { signed long r; - r = fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); + r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); if (r < 0) DRM_ERROR("Error (%ld) waiting for fence!\n", r); } - fence_get(fence); + dma_fence_get(fence); spin_lock(&ctx->ring_lock); cring->fences[idx] = fence; cring->sequence++; spin_unlock(&ctx->ring_lock); - fence_put(other); + dma_fence_put(other); return seq; } -struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, - struct amdgpu_ring *ring, uint64_t seq) +struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, + struct amdgpu_ring *ring, uint64_t seq) { struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; - struct fence *fence; + struct dma_fence *fence; spin_lock(&ctx->ring_lock); @@ -288,7 +288,7 @@ struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, return NULL; } - fence = fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]); + fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]); spin_unlock(&ctx->ring_lock); return fence; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b4f4a9239069..0262b43c8f0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1599,7 +1599,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->vm_manager.vm_pte_funcs = NULL; adev->vm_manager.vm_pte_num_rings = 0; adev->gart.gart_funcs = NULL; - adev->fence_context = fence_context_alloc(AMDGPU_MAX_RINGS); + adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); adev->smc_rreg = &amdgpu_invalid_rreg; adev->smc_wreg = &amdgpu_invalid_wreg; @@ -2193,7 +2193,7 @@ bool amdgpu_need_backup(struct amdgpu_device *adev) static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_bo *bo, - struct fence **fence) + struct dma_fence **fence) { uint32_t domain; int r; @@ -2312,30 +2312,30 @@ retry: if (need_full_reset && amdgpu_need_backup(adev)) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_bo *bo, *tmp; - struct fence *fence = NULL, *next = NULL; + struct dma_fence *fence = NULL, *next = NULL; DRM_INFO("recover vram bo from shadow\n"); mutex_lock(&adev->shadow_list_lock); list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) { amdgpu_recover_vram_from_shadow(adev, ring, bo, &next); if (fence) { - r = fence_wait(fence, false); + r = dma_fence_wait(fence, false); if (r) { WARN(r, "recovery from shadow isn't comleted\n"); break; } } - fence_put(fence); + dma_fence_put(fence); fence = next; } mutex_unlock(&adev->shadow_list_lock); if (fence) { - r = fence_wait(fence, false); + r = dma_fence_wait(fence, false); if (r) WARN(r, "recovery from shadow isn't comleted\n"); } - fence_put(fence); + dma_fence_put(fence); } for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 083e2b429872..075c0d7db205 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -35,29 +35,29 @@ #include #include -static void amdgpu_flip_callback(struct fence *f, struct fence_cb *cb) +static void amdgpu_flip_callback(struct dma_fence *f, struct dma_fence_cb *cb) { struct amdgpu_flip_work *work = container_of(cb, struct amdgpu_flip_work, cb); - fence_put(f); + dma_fence_put(f); schedule_work(&work->flip_work.work); } static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work, - struct fence **f) + struct dma_fence **f) { - struct fence *fence= *f; + struct dma_fence *fence= *f; if (fence == NULL) return false; *f = NULL; - if (!fence_add_callback(fence, &work->cb, amdgpu_flip_callback)) + if (!dma_fence_add_callback(fence, &work->cb, amdgpu_flip_callback)) return true; - fence_put(fence); + dma_fence_put(fence); return false; } @@ -244,9 +244,9 @@ unreserve: cleanup: amdgpu_bo_unref(&work->old_abo); - fence_put(work->excl); + dma_fence_put(work->excl); for (i = 0; i < work->shared_count; ++i) - fence_put(work->shared[i]); + dma_fence_put(work->shared[i]); kfree(work->shared); kfree(work); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 3a2e42f4b897..57552c79ec58 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -48,7 +48,7 @@ */ struct amdgpu_fence { - struct fence base; + struct dma_fence base; /* RB, DMA, etc. */ struct amdgpu_ring *ring; @@ -73,8 +73,8 @@ void amdgpu_fence_slab_fini(void) /* * Cast helper */ -static const struct fence_ops amdgpu_fence_ops; -static inline struct amdgpu_fence *to_amdgpu_fence(struct fence *f) +static const struct dma_fence_ops amdgpu_fence_ops; +static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f) { struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base); @@ -130,11 +130,11 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) * Emits a fence command on the requested ring (all asics). * Returns 0 on success, -ENOMEM on failure. */ -int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f) +int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) { struct amdgpu_device *adev = ring->adev; struct amdgpu_fence *fence; - struct fence *old, **ptr; + struct dma_fence *old, **ptr; uint32_t seq; fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); @@ -143,10 +143,10 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f) seq = ++ring->fence_drv.sync_seq; fence->ring = ring; - fence_init(&fence->base, &amdgpu_fence_ops, - &ring->fence_drv.lock, - adev->fence_context + ring->idx, - seq); + dma_fence_init(&fence->base, &amdgpu_fence_ops, + &ring->fence_drv.lock, + adev->fence_context + ring->idx, + seq); amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, AMDGPU_FENCE_FLAG_INT); @@ -155,12 +155,12 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f) * emitting the fence would mess up the hardware ring buffer. */ old = rcu_dereference_protected(*ptr, 1); - if (old && !fence_is_signaled(old)) { + if (old && !dma_fence_is_signaled(old)) { DRM_INFO("rcu slot is busy\n"); - fence_wait(old, false); + dma_fence_wait(old, false); } - rcu_assign_pointer(*ptr, fence_get(&fence->base)); + rcu_assign_pointer(*ptr, dma_fence_get(&fence->base)); *f = &fence->base; @@ -211,7 +211,7 @@ void amdgpu_fence_process(struct amdgpu_ring *ring) seq &= drv->num_fences_mask; do { - struct fence *fence, **ptr; + struct dma_fence *fence, **ptr; ++last_seq; last_seq &= drv->num_fences_mask; @@ -224,13 +224,13 @@ void amdgpu_fence_process(struct amdgpu_ring *ring) if (!fence) continue; - r = fence_signal(fence); + r = dma_fence_signal(fence); if (!r) - FENCE_TRACE(fence, "signaled from irq context\n"); + DMA_FENCE_TRACE(fence, "signaled from irq context\n"); else BUG(); - fence_put(fence); + dma_fence_put(fence); } while (last_seq != seq); } @@ -260,7 +260,7 @@ static void amdgpu_fence_fallback(unsigned long arg) int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) { uint64_t seq = ACCESS_ONCE(ring->fence_drv.sync_seq); - struct fence *fence, **ptr; + struct dma_fence *fence, **ptr; int r; if (!seq) @@ -269,14 +269,14 @@ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; rcu_read_lock(); fence = rcu_dereference(*ptr); - if (!fence || !fence_get_rcu(fence)) { + if (!fence || !dma_fence_get_rcu(fence)) { rcu_read_unlock(); return 0; } rcu_read_unlock(); - r = fence_wait(fence, false); - fence_put(fence); + r = dma_fence_wait(fence, false); + dma_fence_put(fence); return r; } @@ -452,7 +452,7 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) amd_sched_fini(&ring->sched); del_timer_sync(&ring->fence_drv.fallback_timer); for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j) - fence_put(ring->fence_drv.fences[j]); + dma_fence_put(ring->fence_drv.fences[j]); kfree(ring->fence_drv.fences); ring->fence_drv.fences = NULL; ring->fence_drv.initialized = false; @@ -541,12 +541,12 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev) * Common fence implementation */ -static const char *amdgpu_fence_get_driver_name(struct fence *fence) +static const char *amdgpu_fence_get_driver_name(struct dma_fence *fence) { return "amdgpu"; } -static const char *amdgpu_fence_get_timeline_name(struct fence *f) +static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) { struct amdgpu_fence *fence = to_amdgpu_fence(f); return (const char *)fence->ring->name; @@ -560,7 +560,7 @@ static const char *amdgpu_fence_get_timeline_name(struct fence *f) * to fence_queue that checks if this fence is signaled, and if so it * signals the fence and removes itself. */ -static bool amdgpu_fence_enable_signaling(struct fence *f) +static bool amdgpu_fence_enable_signaling(struct dma_fence *f) { struct amdgpu_fence *fence = to_amdgpu_fence(f); struct amdgpu_ring *ring = fence->ring; @@ -568,7 +568,7 @@ static bool amdgpu_fence_enable_signaling(struct fence *f) if (!timer_pending(&ring->fence_drv.fallback_timer)) amdgpu_fence_schedule_fallback(ring); - FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx); + DMA_FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx); return true; } @@ -582,7 +582,7 @@ static bool amdgpu_fence_enable_signaling(struct fence *f) */ static void amdgpu_fence_free(struct rcu_head *rcu) { - struct fence *f = container_of(rcu, struct fence, rcu); + struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); struct amdgpu_fence *fence = to_amdgpu_fence(f); kmem_cache_free(amdgpu_fence_slab, fence); } @@ -595,16 +595,16 @@ static void amdgpu_fence_free(struct rcu_head *rcu) * This function is called when the reference count becomes zero. * It just RCU schedules freeing up the fence. */ -static void amdgpu_fence_release(struct fence *f) +static void amdgpu_fence_release(struct dma_fence *f) { call_rcu(&f->rcu, amdgpu_fence_free); } -static const struct fence_ops amdgpu_fence_ops = { +static const struct dma_fence_ops amdgpu_fence_ops = { .get_driver_name = amdgpu_fence_get_driver_name, .get_timeline_name = amdgpu_fence_get_timeline_name, .enable_signaling = amdgpu_fence_enable_signaling, - .wait = fence_default_wait, + .wait = dma_fence_default_wait, .release = amdgpu_fence_release, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 6a6c86c9c169..c3672dfcfd6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -89,7 +89,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, * Free an IB (all asics). */ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, - struct fence *f) + struct dma_fence *f) { amdgpu_sa_bo_free(adev, &ib->sa_bo, f); } @@ -116,8 +116,8 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, * to SI there was just a DE IB. */ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, - struct amdgpu_ib *ibs, struct fence *last_vm_update, - struct amdgpu_job *job, struct fence **f) + struct amdgpu_ib *ibs, struct dma_fence *last_vm_update, + struct amdgpu_job *job, struct dma_fence **f) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 8c5807994073..a0de6286c453 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -81,7 +81,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, void amdgpu_job_free_resources(struct amdgpu_job *job) { - struct fence *f; + struct dma_fence *f; unsigned i; /* use sched fence if available */ @@ -95,7 +95,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job) { struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); - fence_put(job->fence); + dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); kfree(job); } @@ -104,14 +104,14 @@ void amdgpu_job_free(struct amdgpu_job *job) { amdgpu_job_free_resources(job); - fence_put(job->fence); + dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); kfree(job); } int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, struct amd_sched_entity *entity, void *owner, - struct fence **f) + struct dma_fence **f) { int r; job->ring = ring; @@ -125,19 +125,19 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, job->owner = owner; job->fence_ctx = entity->fence_context; - *f = fence_get(&job->base.s_fence->finished); + *f = dma_fence_get(&job->base.s_fence->finished); amdgpu_job_free_resources(job); amd_sched_entity_push_job(&job->base); return 0; } -static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) +static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) { struct amdgpu_job *job = to_amdgpu_job(sched_job); struct amdgpu_vm *vm = job->vm; - struct fence *fence = amdgpu_sync_get_fence(&job->sync); + struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync); if (fence == NULL && vm && !job->vm_id) { struct amdgpu_ring *ring = job->ring; @@ -155,9 +155,9 @@ static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) return fence; } -static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job) +static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) { - struct fence *fence = NULL; + struct dma_fence *fence = NULL; struct amdgpu_job *job; int r; @@ -176,8 +176,8 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job) DRM_ERROR("Error scheduling IBs (%d)\n", r); /* if gpu reset, hw fence will be replaced here */ - fence_put(job->fence); - job->fence = fence_get(fence); + dma_fence_put(job->fence); + job->fence = dma_fence_get(fence); amdgpu_job_free_resources(job); return fence; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index aa074fac0c7f..55e142a5ff5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -383,7 +383,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { - struct fence *fence; + struct dma_fence *fence; if (adev->mman.buffer_funcs_ring == NULL || !adev->mman.buffer_funcs_ring->ready) { @@ -403,9 +403,9 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence); amdgpu_bo_fence(bo, fence, false); amdgpu_bo_unreserve(bo); - fence_put(bo->tbo.moving); - bo->tbo.moving = fence_get(fence); - fence_put(fence); + dma_fence_put(bo->tbo.moving); + bo->tbo.moving = dma_fence_get(fence); + dma_fence_put(fence); } *bo_ptr = bo; @@ -491,7 +491,7 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_bo *bo, struct reservation_object *resv, - struct fence **fence, + struct dma_fence **fence, bool direct) { @@ -523,7 +523,7 @@ int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_bo *bo, struct reservation_object *resv, - struct fence **fence, + struct dma_fence **fence, bool direct) { @@ -926,7 +926,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) * @shared: true if fence should be added shared * */ -void amdgpu_bo_fence(struct amdgpu_bo *bo, struct fence *fence, +void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, bool shared) { struct reservation_object *resv = bo->tbo.resv; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 8255034d73eb..3e785ed3cb4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -156,19 +156,19 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem); int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); -void amdgpu_bo_fence(struct amdgpu_bo *bo, struct fence *fence, +void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, bool shared); u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_bo *bo, struct reservation_object *resv, - struct fence **fence, bool direct); + struct dma_fence **fence, bool direct); int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_bo *bo, struct reservation_object *resv, - struct fence **fence, + struct dma_fence **fence, bool direct); @@ -200,7 +200,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, unsigned size, unsigned align); void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, - struct fence *fence); + struct dma_fence *fence); #if defined(CONFIG_DEBUG_FS) void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, struct seq_file *m); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index d8af37a845f4..fd26c4b8d793 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -147,7 +147,7 @@ static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo) } list_del_init(&sa_bo->olist); list_del_init(&sa_bo->flist); - fence_put(sa_bo->fence); + dma_fence_put(sa_bo->fence); kfree(sa_bo); } @@ -161,7 +161,7 @@ static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager) sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist); list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) { if (sa_bo->fence == NULL || - !fence_is_signaled(sa_bo->fence)) { + !dma_fence_is_signaled(sa_bo->fence)) { return; } amdgpu_sa_bo_remove_locked(sa_bo); @@ -244,7 +244,7 @@ static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager, } static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, - struct fence **fences, + struct dma_fence **fences, unsigned *tries) { struct amdgpu_sa_bo *best_bo = NULL; @@ -272,7 +272,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, sa_bo = list_first_entry(&sa_manager->flist[i], struct amdgpu_sa_bo, flist); - if (!fence_is_signaled(sa_bo->fence)) { + if (!dma_fence_is_signaled(sa_bo->fence)) { fences[i] = sa_bo->fence; continue; } @@ -314,7 +314,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, struct amdgpu_sa_bo **sa_bo, unsigned size, unsigned align) { - struct fence *fences[AMDGPU_SA_NUM_FENCE_LISTS]; + struct dma_fence *fences[AMDGPU_SA_NUM_FENCE_LISTS]; unsigned tries[AMDGPU_SA_NUM_FENCE_LISTS]; unsigned count; int i, r; @@ -356,14 +356,14 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, for (i = 0, count = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) if (fences[i]) - fences[count++] = fence_get(fences[i]); + fences[count++] = dma_fence_get(fences[i]); if (count) { spin_unlock(&sa_manager->wq.lock); - t = fence_wait_any_timeout(fences, count, false, - MAX_SCHEDULE_TIMEOUT); + t = dma_fence_wait_any_timeout(fences, count, false, + MAX_SCHEDULE_TIMEOUT); for (i = 0; i < count; ++i) - fence_put(fences[i]); + dma_fence_put(fences[i]); r = (t > 0) ? 0 : t; spin_lock(&sa_manager->wq.lock); @@ -384,7 +384,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, } void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, - struct fence *fence) + struct dma_fence *fence) { struct amdgpu_sa_manager *sa_manager; @@ -394,10 +394,10 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, sa_manager = (*sa_bo)->manager; spin_lock(&sa_manager->wq.lock); - if (fence && !fence_is_signaled(fence)) { + if (fence && !dma_fence_is_signaled(fence)) { uint32_t idx; - (*sa_bo)->fence = fence_get(fence); + (*sa_bo)->fence = dma_fence_get(fence); idx = fence->context % AMDGPU_SA_NUM_FENCE_LISTS; list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 5c8d3022fb87..ed814e6d0207 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -34,7 +34,7 @@ struct amdgpu_sync_entry { struct hlist_node node; - struct fence *fence; + struct dma_fence *fence; }; static struct kmem_cache *amdgpu_sync_slab; @@ -60,7 +60,8 @@ void amdgpu_sync_create(struct amdgpu_sync *sync) * * Test if the fence was issued by us. */ -static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f) +static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, + struct dma_fence *f) { struct amd_sched_fence *s_fence = to_amd_sched_fence(f); @@ -81,7 +82,7 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f) * * Extract who originally created the fence. */ -static void *amdgpu_sync_get_owner(struct fence *f) +static void *amdgpu_sync_get_owner(struct dma_fence *f) { struct amd_sched_fence *s_fence = to_amd_sched_fence(f); @@ -99,13 +100,14 @@ static void *amdgpu_sync_get_owner(struct fence *f) * * Either keep the existing fence or the new one, depending which one is later. */ -static void amdgpu_sync_keep_later(struct fence **keep, struct fence *fence) +static void amdgpu_sync_keep_later(struct dma_fence **keep, + struct dma_fence *fence) { - if (*keep && fence_is_later(*keep, fence)) + if (*keep && dma_fence_is_later(*keep, fence)) return; - fence_put(*keep); - *keep = fence_get(fence); + dma_fence_put(*keep); + *keep = dma_fence_get(fence); } /** @@ -117,7 +119,7 @@ static void amdgpu_sync_keep_later(struct fence **keep, struct fence *fence) * Tries to add the fence to an existing hash entry. Returns true when an entry * was found, false otherwise. */ -static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct fence *f) +static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f) { struct amdgpu_sync_entry *e; @@ -139,7 +141,7 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct fence *f) * */ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, - struct fence *f) + struct dma_fence *f) { struct amdgpu_sync_entry *e; @@ -158,7 +160,7 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, return -ENOMEM; hash_add(sync->fences, &e->node, f->context); - e->fence = fence_get(f); + e->fence = dma_fence_get(f); return 0; } @@ -177,7 +179,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, void *owner) { struct reservation_object_list *flist; - struct fence *f; + struct dma_fence *f; void *fence_owner; unsigned i; int r = 0; @@ -231,15 +233,15 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, * Returns the next fence not signaled yet without removing it from the sync * object. */ -struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, - struct amdgpu_ring *ring) +struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, + struct amdgpu_ring *ring) { struct amdgpu_sync_entry *e; struct hlist_node *tmp; int i; hash_for_each_safe(sync->fences, i, tmp, e, node) { - struct fence *f = e->fence; + struct dma_fence *f = e->fence; struct amd_sched_fence *s_fence = to_amd_sched_fence(f); if (ring && s_fence) { @@ -247,16 +249,16 @@ struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, * when they are scheduled. */ if (s_fence->sched == &ring->sched) { - if (fence_is_signaled(&s_fence->scheduled)) + if (dma_fence_is_signaled(&s_fence->scheduled)) continue; return &s_fence->scheduled; } } - if (fence_is_signaled(f)) { + if (dma_fence_is_signaled(f)) { hash_del(&e->node); - fence_put(f); + dma_fence_put(f); kmem_cache_free(amdgpu_sync_slab, e); continue; } @@ -274,11 +276,11 @@ struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, * * Get and removes the next fence from the sync object not signaled yet. */ -struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) +struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) { struct amdgpu_sync_entry *e; struct hlist_node *tmp; - struct fence *f; + struct dma_fence *f; int i; hash_for_each_safe(sync->fences, i, tmp, e, node) { @@ -288,10 +290,10 @@ struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) hash_del(&e->node); kmem_cache_free(amdgpu_sync_slab, e); - if (!fence_is_signaled(f)) + if (!dma_fence_is_signaled(f)) return f; - fence_put(f); + dma_fence_put(f); } return NULL; } @@ -311,11 +313,11 @@ void amdgpu_sync_free(struct amdgpu_sync *sync) hash_for_each_safe(sync->fences, i, tmp, e, node) { hash_del(&e->node); - fence_put(e->fence); + dma_fence_put(e->fence); kmem_cache_free(amdgpu_sync_slab, e); } - fence_put(sync->last_vm_update); + dma_fence_put(sync->last_vm_update); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index b827c75e95de..e05a24325eeb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -78,7 +78,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) void *gtt_map, *vram_map; void **gtt_start, **gtt_end; void **vram_start, **vram_end; - struct fence *fence = NULL; + struct dma_fence *fence = NULL; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, @@ -118,13 +118,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) goto out_lclean_unpin; } - r = fence_wait(fence, false); + r = dma_fence_wait(fence, false); if (r) { DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i); goto out_lclean_unpin; } - fence_put(fence); + dma_fence_put(fence); r = amdgpu_bo_kmap(vram_obj, &vram_map); if (r) { @@ -163,13 +163,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) goto out_lclean_unpin; } - r = fence_wait(fence, false); + r = dma_fence_wait(fence, false); if (r) { DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i); goto out_lclean_unpin; } - fence_put(fence); + dma_fence_put(fence); r = amdgpu_bo_kmap(gtt_obj[i], >t_map); if (r) { @@ -216,7 +216,7 @@ out_lclean: amdgpu_bo_unref(>t_obj[i]); } if (fence) - fence_put(fence); + dma_fence_put(fence); break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 067e5e683bb3..bb964a8ff938 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -104,7 +104,7 @@ TRACE_EVENT(amdgpu_cs_ioctl, __field(struct amdgpu_device *, adev) __field(struct amd_sched_job *, sched_job) __field(struct amdgpu_ib *, ib) - __field(struct fence *, fence) + __field(struct dma_fence *, fence) __field(char *, ring_name) __field(u32, num_ibs) ), @@ -129,7 +129,7 @@ TRACE_EVENT(amdgpu_sched_run_job, __field(struct amdgpu_device *, adev) __field(struct amd_sched_job *, sched_job) __field(struct amdgpu_ib *, ib) - __field(struct fence *, fence) + __field(struct dma_fence *, fence) __field(char *, ring_name) __field(u32, num_ibs) ), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index dcaf691f56b5..a743aeabc767 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -268,7 +268,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, struct amdgpu_device *adev; struct amdgpu_ring *ring; uint64_t old_start, new_start; - struct fence *fence; + struct dma_fence *fence; int r; adev = amdgpu_get_adev(bo->bdev); @@ -316,7 +316,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, return r; r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); - fence_put(fence); + dma_fence_put(fence); return r; } @@ -1247,7 +1247,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t dst_offset, uint32_t byte_count, struct reservation_object *resv, - struct fence **fence, bool direct_submit) + struct dma_fence **fence, bool direct_submit) { struct amdgpu_device *adev = ring->adev; struct amdgpu_job *job; @@ -1294,7 +1294,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, if (direct_submit) { r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, NULL, NULL, fence); - job->fence = fence_get(*fence); + job->fence = dma_fence_get(*fence); if (r) DRM_ERROR("Error scheduling IBs (%d)\n", r); amdgpu_job_free(job); @@ -1315,7 +1315,7 @@ error_free: int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, struct reservation_object *resv, - struct fence **fence) + struct dma_fence **fence) { struct amdgpu_device *adev = bo->adev; struct amdgpu_job *job; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 9812c805326c..3f293e189378 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -77,11 +77,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t dst_offset, uint32_t byte_count, struct reservation_object *resv, - struct fence **fence, bool direct_submit); + struct dma_fence **fence, bool direct_submit); int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, struct reservation_object *resv, - struct fence **fence); + struct dma_fence **fence); int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); bool amdgpu_ttm_is_bound(struct ttm_tt *ttm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index e3281cacc586..0f6575e7ef8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -333,7 +333,7 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp) for (i = 0; i < adev->uvd.max_handles; ++i) { uint32_t handle = atomic_read(&adev->uvd.handles[i]); if (handle != 0 && adev->uvd.filp[i] == filp) { - struct fence *fence; + struct dma_fence *fence; r = amdgpu_uvd_get_destroy_msg(ring, handle, false, &fence); @@ -342,8 +342,8 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp) continue; } - fence_wait(fence, false); - fence_put(fence); + dma_fence_wait(fence, false); + dma_fence_put(fence); adev->uvd.filp[i] = NULL; atomic_set(&adev->uvd.handles[i], 0); @@ -909,14 +909,14 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) } static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, - bool direct, struct fence **fence) + bool direct, struct dma_fence **fence) { struct ttm_validate_buffer tv; struct ww_acquire_ctx ticket; struct list_head head; struct amdgpu_job *job; struct amdgpu_ib *ib; - struct fence *f = NULL; + struct dma_fence *f = NULL; struct amdgpu_device *adev = ring->adev; uint64_t addr; int i, r; @@ -960,7 +960,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, if (direct) { r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f); - job->fence = fence_get(f); + job->fence = dma_fence_get(f); if (r) goto err_free; @@ -975,9 +975,9 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, ttm_eu_fence_buffer_objects(&ticket, &head, f); if (fence) - *fence = fence_get(f); + *fence = dma_fence_get(f); amdgpu_bo_unref(&bo); - fence_put(f); + dma_fence_put(f); return 0; @@ -993,7 +993,7 @@ err: crash the vcpu so just try to emmit a dummy create/destroy msg to avoid this */ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, - struct fence **fence) + struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; struct amdgpu_bo *bo; @@ -1042,7 +1042,7 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, } int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - bool direct, struct fence **fence) + bool direct, struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; struct amdgpu_bo *bo; @@ -1128,7 +1128,7 @@ void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring) */ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout) { - struct fence *fence; + struct dma_fence *fence; long r; r = amdgpu_uvd_get_create_msg(ring, 1, NULL); @@ -1143,7 +1143,7 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto error; } - r = fence_wait_timeout(fence, false, timeout); + r = dma_fence_wait_timeout(fence, false, timeout); if (r == 0) { DRM_ERROR("amdgpu: IB test timed out.\n"); r = -ETIMEDOUT; @@ -1154,7 +1154,7 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = 0; } - fence_put(fence); + dma_fence_put(fence); error: return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index c850009602d1..6249ba1bde2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -29,9 +29,9 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev); int amdgpu_uvd_suspend(struct amdgpu_device *adev); int amdgpu_uvd_resume(struct amdgpu_device *adev); int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, - struct fence **fence); + struct dma_fence **fence); int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - bool direct, struct fence **fence); + bool direct, struct dma_fence **fence); void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp); int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 7fe8fd884f06..f0f8afb85585 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -395,12 +395,12 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) * Open up a stream for HW test */ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, - struct fence **fence) + struct dma_fence **fence) { const unsigned ib_size_dw = 1024; struct amdgpu_job *job; struct amdgpu_ib *ib; - struct fence *f = NULL; + struct dma_fence *f = NULL; uint64_t dummy; int i, r; @@ -450,14 +450,14 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, ib->ptr[i] = 0x0; r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f); - job->fence = fence_get(f); + job->fence = dma_fence_get(f); if (r) goto err; amdgpu_job_free(job); if (fence) - *fence = fence_get(f); - fence_put(f); + *fence = dma_fence_get(f); + dma_fence_put(f); return 0; err: @@ -476,12 +476,12 @@ err: * Close up a stream for HW test or if userspace failed to do so */ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - bool direct, struct fence **fence) + bool direct, struct dma_fence **fence) { const unsigned ib_size_dw = 1024; struct amdgpu_job *job; struct amdgpu_ib *ib; - struct fence *f = NULL; + struct dma_fence *f = NULL; int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); @@ -513,7 +513,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, if (direct) { r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f); - job->fence = fence_get(f); + job->fence = dma_fence_get(f); if (r) goto err; @@ -526,8 +526,8 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, } if (fence) - *fence = fence_get(f); - fence_put(f); + *fence = dma_fence_get(f); + dma_fence_put(f); return 0; err: @@ -883,7 +883,7 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) */ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) { - struct fence *fence = NULL; + struct dma_fence *fence = NULL; long r; /* skip vce ring1/2 ib test for now, since it's not reliable */ @@ -902,7 +902,7 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) goto error; } - r = fence_wait_timeout(fence, false, timeout); + r = dma_fence_wait_timeout(fence, false, timeout); if (r == 0) { DRM_ERROR("amdgpu: IB test timed out.\n"); r = -ETIMEDOUT; @@ -913,6 +913,6 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = 0; } error: - fence_put(fence); + dma_fence_put(fence); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index 12729d2852df..566c29ddeeb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -29,9 +29,9 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev); int amdgpu_vce_suspend(struct amdgpu_device *adev); int amdgpu_vce_resume(struct amdgpu_device *adev); int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, - struct fence **fence); + struct dma_fence **fence); int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - bool direct, struct fence **fence); + bool direct, struct dma_fence **fence); void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 06f24322e7c3..22cabb5456e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -25,7 +25,7 @@ * Alex Deucher * Jerome Glisse */ -#include +#include #include #include #include "amdgpu.h" @@ -194,14 +194,14 @@ static bool amdgpu_vm_is_gpu_reset(struct amdgpu_device *adev, * Allocate an id for the vm, adding fences to the sync obj as necessary. */ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync, struct fence *fence, + struct amdgpu_sync *sync, struct dma_fence *fence, struct amdgpu_job *job) { struct amdgpu_device *adev = ring->adev; uint64_t fence_context = adev->fence_context + ring->idx; - struct fence *updates = sync->last_vm_update; + struct dma_fence *updates = sync->last_vm_update; struct amdgpu_vm_id *id, *idle; - struct fence **fences; + struct dma_fence **fences; unsigned i; int r = 0; @@ -225,17 +225,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (&idle->list == &adev->vm_manager.ids_lru) { u64 fence_context = adev->vm_manager.fence_context + ring->idx; unsigned seqno = ++adev->vm_manager.seqno[ring->idx]; - struct fence_array *array; + struct dma_fence_array *array; unsigned j; for (j = 0; j < i; ++j) - fence_get(fences[j]); + dma_fence_get(fences[j]); - array = fence_array_create(i, fences, fence_context, + array = dma_fence_array_create(i, fences, fence_context, seqno, true); if (!array) { for (j = 0; j < i; ++j) - fence_put(fences[j]); + dma_fence_put(fences[j]); kfree(fences); r = -ENOMEM; goto error; @@ -243,7 +243,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, r = amdgpu_sync_fence(ring->adev, sync, &array->base); - fence_put(&array->base); + dma_fence_put(&array->base); if (r) goto error; @@ -257,7 +257,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, /* Check if we can use a VMID already assigned to this VM */ i = ring->idx; do { - struct fence *flushed; + struct dma_fence *flushed; id = vm->ids[i++]; if (i == AMDGPU_MAX_RINGS) @@ -279,12 +279,12 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, continue; if (id->last_flush->context != fence_context && - !fence_is_signaled(id->last_flush)) + !dma_fence_is_signaled(id->last_flush)) continue; flushed = id->flushed_updates; if (updates && - (!flushed || fence_is_later(updates, flushed))) + (!flushed || dma_fence_is_later(updates, flushed))) continue; /* Good we can use this VMID. Remember this submission as @@ -315,14 +315,14 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r) goto error; - fence_put(id->first); - id->first = fence_get(fence); + dma_fence_put(id->first); + id->first = dma_fence_get(fence); - fence_put(id->last_flush); + dma_fence_put(id->last_flush); id->last_flush = NULL; - fence_put(id->flushed_updates); - id->flushed_updates = fence_get(updates); + dma_fence_put(id->flushed_updates); + id->flushed_updates = dma_fence_get(updates); id->pd_gpu_addr = job->vm_pd_addr; id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); @@ -393,7 +393,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) if (ring->funcs->emit_vm_flush && (job->vm_needs_flush || amdgpu_vm_is_gpu_reset(adev, id))) { - struct fence *fence; + struct dma_fence *fence; trace_amdgpu_vm_flush(job->vm_pd_addr, ring->idx, job->vm_id); amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr); @@ -403,7 +403,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) return r; mutex_lock(&adev->vm_manager.lock); - fence_put(id->last_flush); + dma_fence_put(id->last_flush); id->last_flush = fence; mutex_unlock(&adev->vm_manager.lock); } @@ -537,7 +537,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, struct amdgpu_bo *bo) { struct amdgpu_ring *ring; - struct fence *fence = NULL; + struct dma_fence *fence = NULL; struct amdgpu_job *job; struct amdgpu_pte_update_params params; unsigned entries; @@ -578,7 +578,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, goto error_free; amdgpu_bo_fence(bo, fence, true); - fence_put(fence); + dma_fence_put(fence); return 0; error_free: @@ -625,7 +625,7 @@ static int amdgpu_vm_update_pd_or_shadow(struct amdgpu_device *adev, unsigned count = 0, pt_idx, ndw; struct amdgpu_job *job; struct amdgpu_pte_update_params params; - struct fence *fence = NULL; + struct dma_fence *fence = NULL; int r; @@ -714,9 +714,9 @@ static int amdgpu_vm_update_pd_or_shadow(struct amdgpu_device *adev, goto error_free; amdgpu_bo_fence(pd, fence, true); - fence_put(vm->page_directory_fence); - vm->page_directory_fence = fence_get(fence); - fence_put(fence); + dma_fence_put(vm->page_directory_fence); + vm->page_directory_fence = dma_fence_get(fence); + dma_fence_put(fence); } else { amdgpu_job_free(job); @@ -929,20 +929,20 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, * Returns 0 for success, -EINVAL for failure. */ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - struct fence *exclusive, + struct dma_fence *exclusive, uint64_t src, dma_addr_t *pages_addr, struct amdgpu_vm *vm, uint64_t start, uint64_t last, uint32_t flags, uint64_t addr, - struct fence **fence) + struct dma_fence **fence) { struct amdgpu_ring *ring; void *owner = AMDGPU_FENCE_OWNER_VM; unsigned nptes, ncmds, ndw; struct amdgpu_job *job; struct amdgpu_pte_update_params params; - struct fence *f = NULL; + struct dma_fence *f = NULL; int r; memset(¶ms, 0, sizeof(params)); @@ -1045,10 +1045,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, amdgpu_bo_fence(vm->page_directory, f, true); if (fence) { - fence_put(*fence); - *fence = fence_get(f); + dma_fence_put(*fence); + *fence = dma_fence_get(f); } - fence_put(f); + dma_fence_put(f); return 0; error_free: @@ -1074,13 +1074,13 @@ error_free: * Returns 0 for success, -EINVAL for failure. */ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, - struct fence *exclusive, + struct dma_fence *exclusive, uint32_t gtt_flags, dma_addr_t *pages_addr, struct amdgpu_vm *vm, struct amdgpu_bo_va_mapping *mapping, uint32_t flags, uint64_t addr, - struct fence **fence) + struct dma_fence **fence) { const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE; @@ -1147,7 +1147,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, dma_addr_t *pages_addr = NULL; uint32_t gtt_flags, flags; struct ttm_mem_reg *mem; - struct fence *exclusive; + struct dma_fence *exclusive; uint64_t addr; int r; @@ -1547,7 +1547,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, kfree(mapping); } - fence_put(bo_va->last_pt_update); + dma_fence_put(bo_va->last_pt_update); kfree(bo_va); } @@ -1709,7 +1709,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_bo_unref(&vm->page_directory->shadow); amdgpu_bo_unref(&vm->page_directory); - fence_put(vm->page_directory_fence); + dma_fence_put(vm->page_directory_fence); } /** @@ -1733,7 +1733,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) &adev->vm_manager.ids_lru); } - adev->vm_manager.fence_context = fence_context_alloc(AMDGPU_MAX_RINGS); + adev->vm_manager.fence_context = + dma_fence_context_alloc(AMDGPU_MAX_RINGS); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) adev->vm_manager.seqno[i] = 0; @@ -1755,8 +1756,8 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_NUM_VM; ++i) { struct amdgpu_vm_id *id = &adev->vm_manager.ids[i]; - fence_put(adev->vm_manager.ids[i].first); + dma_fence_put(adev->vm_manager.ids[i].first); amdgpu_sync_free(&adev->vm_manager.ids[i].active); - fence_put(id->flushed_updates); + dma_fence_put(id->flushed_updates); } } diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index cb952acc7133..321b9d5a4e6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -622,7 +622,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; - struct fence *f = NULL; + struct dma_fence *f = NULL; unsigned index; u32 tmp = 0; u64 gpu_addr; @@ -655,7 +655,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout) if (r) goto err1; - r = fence_wait_timeout(f, false, timeout); + r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; @@ -675,7 +675,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout) err1: amdgpu_ib_free(adev, &ib, NULL); - fence_put(f); + dma_fence_put(f); err0: amdgpu_wb_free(adev, index); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 40abb6b81c09..7dc11a19e49d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1522,7 +1522,7 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; - struct fence *f = NULL; + struct dma_fence *f = NULL; uint32_t scratch; uint32_t tmp = 0; long r; @@ -1548,7 +1548,7 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) if (r) goto err2; - r = fence_wait_timeout(f, false, timeout); + r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; @@ -1569,7 +1569,7 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) err2: amdgpu_ib_free(adev, &ib, NULL); - fence_put(f); + dma_fence_put(f); err1: amdgpu_gfx_scratch_free(adev, scratch); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 71116da9e782..3865ffe7de55 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2286,7 +2286,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; - struct fence *f = NULL; + struct dma_fence *f = NULL; uint32_t scratch; uint32_t tmp = 0; long r; @@ -2312,7 +2312,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) if (r) goto err2; - r = fence_wait_timeout(f, false, timeout); + r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; @@ -2333,7 +2333,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) err2: amdgpu_ib_free(adev, &ib, NULL); - fence_put(f); + dma_fence_put(f); err1: amdgpu_gfx_scratch_free(adev, scratch); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index ee6a48a09214..a9dd18847c40 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -798,7 +798,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; - struct fence *f = NULL; + struct dma_fence *f = NULL; uint32_t scratch; uint32_t tmp = 0; long r; @@ -824,7 +824,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) if (r) goto err2; - r = fence_wait_timeout(f, false, timeout); + r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { DRM_ERROR("amdgpu: IB test timed out.\n"); r = -ETIMEDOUT; @@ -844,7 +844,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) } err2: amdgpu_ib_free(adev, &ib, NULL); - fence_put(f); + dma_fence_put(f); err1: amdgpu_gfx_scratch_free(adev, scratch); return r; @@ -1575,7 +1575,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) { struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; struct amdgpu_ib ib; - struct fence *f = NULL; + struct dma_fence *f = NULL; int r, i; u32 tmp; unsigned total_size, vgpr_offset, sgpr_offset; @@ -1708,7 +1708,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) } /* wait for the GPU to finish processing the IB */ - r = fence_wait(f, false); + r = dma_fence_wait(f, false); if (r) { DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); goto fail; @@ -1729,7 +1729,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) fail: amdgpu_ib_free(adev, &ib, NULL); - fence_put(f); + dma_fence_put(f); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 565dab3c7218..7edf6e8c63dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -668,7 +668,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; - struct fence *f = NULL; + struct dma_fence *f = NULL; unsigned index; u32 tmp = 0; u64 gpu_addr; @@ -705,7 +705,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout) if (r) goto err1; - r = fence_wait_timeout(f, false, timeout); + r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; @@ -725,7 +725,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout) err1: amdgpu_ib_free(adev, &ib, NULL); - fence_put(f); + dma_fence_put(f); err0: amdgpu_wb_free(adev, index); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index a9d10941fb53..1932a67c62ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -871,7 +871,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; - struct fence *f = NULL; + struct dma_fence *f = NULL; unsigned index; u32 tmp = 0; u64 gpu_addr; @@ -908,7 +908,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) if (r) goto err1; - r = fence_wait_timeout(f, false, timeout); + r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; @@ -927,7 +927,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) } err1: amdgpu_ib_free(adev, &ib, NULL); - fence_put(f); + dma_fence_put(f); err0: amdgpu_wb_free(adev, index); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index de358193a8f9..b4cf4e25bf91 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -274,7 +274,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; - struct fence *f = NULL; + struct dma_fence *f = NULL; unsigned index; u32 tmp = 0; u64 gpu_addr; @@ -305,7 +305,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) if (r) goto err1; - r = fence_wait_timeout(f, false, timeout); + r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; @@ -325,7 +325,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) err1: amdgpu_ib_free(adev, &ib, NULL); - fence_put(f); + dma_fence_put(f); err0: amdgpu_wb_free(adev, index); return r; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h index b961a1c6caf3..dbd4fd3a810b 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h @@ -17,7 +17,7 @@ TRACE_EVENT(amd_sched_job, TP_STRUCT__entry( __field(struct amd_sched_entity *, entity) __field(struct amd_sched_job *, sched_job) - __field(struct fence *, fence) + __field(struct dma_fence *, fence) __field(const char *, name) __field(u32, job_count) __field(int, hw_job_count) @@ -42,7 +42,7 @@ TRACE_EVENT(amd_sched_process_job, TP_PROTO(struct amd_sched_fence *fence), TP_ARGS(fence), TP_STRUCT__entry( - __field(struct fence *, fence) + __field(struct dma_fence *, fence) ), TP_fast_assign( diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 963a24d46a93..5364e6a7ec8f 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -32,7 +32,7 @@ static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity); static void amd_sched_wakeup(struct amd_gpu_scheduler *sched); -static void amd_sched_process_job(struct fence *f, struct fence_cb *cb); +static void amd_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb); struct kmem_cache *sched_fence_slab; atomic_t sched_fence_slab_ref = ATOMIC_INIT(0); @@ -141,7 +141,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, return r; atomic_set(&entity->fence_seq, 0); - entity->fence_context = fence_context_alloc(2); + entity->fence_context = dma_fence_context_alloc(2); return 0; } @@ -221,32 +221,32 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, kfifo_free(&entity->job_queue); } -static void amd_sched_entity_wakeup(struct fence *f, struct fence_cb *cb) +static void amd_sched_entity_wakeup(struct dma_fence *f, struct dma_fence_cb *cb) { struct amd_sched_entity *entity = container_of(cb, struct amd_sched_entity, cb); entity->dependency = NULL; - fence_put(f); + dma_fence_put(f); amd_sched_wakeup(entity->sched); } -static void amd_sched_entity_clear_dep(struct fence *f, struct fence_cb *cb) +static void amd_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb *cb) { struct amd_sched_entity *entity = container_of(cb, struct amd_sched_entity, cb); entity->dependency = NULL; - fence_put(f); + dma_fence_put(f); } static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity) { struct amd_gpu_scheduler *sched = entity->sched; - struct fence * fence = entity->dependency; + struct dma_fence * fence = entity->dependency; struct amd_sched_fence *s_fence; if (fence->context == entity->fence_context) { /* We can ignore fences from ourself */ - fence_put(entity->dependency); + dma_fence_put(entity->dependency); return false; } @@ -257,23 +257,23 @@ static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity) * Fence is from the same scheduler, only need to wait for * it to be scheduled */ - fence = fence_get(&s_fence->scheduled); - fence_put(entity->dependency); + fence = dma_fence_get(&s_fence->scheduled); + dma_fence_put(entity->dependency); entity->dependency = fence; - if (!fence_add_callback(fence, &entity->cb, - amd_sched_entity_clear_dep)) + if (!dma_fence_add_callback(fence, &entity->cb, + amd_sched_entity_clear_dep)) return true; /* Ignore it when it is already scheduled */ - fence_put(fence); + dma_fence_put(fence); return false; } - if (!fence_add_callback(entity->dependency, &entity->cb, - amd_sched_entity_wakeup)) + if (!dma_fence_add_callback(entity->dependency, &entity->cb, + amd_sched_entity_wakeup)) return true; - fence_put(entity->dependency); + dma_fence_put(entity->dependency); return false; } @@ -354,7 +354,8 @@ static void amd_sched_job_finish(struct work_struct *work) sched->ops->free_job(s_job); } -static void amd_sched_job_finish_cb(struct fence *f, struct fence_cb *cb) +static void amd_sched_job_finish_cb(struct dma_fence *f, + struct dma_fence_cb *cb) { struct amd_sched_job *job = container_of(cb, struct amd_sched_job, finish_cb); @@ -388,8 +389,8 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched) spin_lock(&sched->job_list_lock); list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) { - if (fence_remove_callback(s_job->s_fence->parent, &s_job->s_fence->cb)) { - fence_put(s_job->s_fence->parent); + if (dma_fence_remove_callback(s_job->s_fence->parent, &s_job->s_fence->cb)) { + dma_fence_put(s_job->s_fence->parent); s_job->s_fence->parent = NULL; } } @@ -410,21 +411,21 @@ void amd_sched_job_recovery(struct amd_gpu_scheduler *sched) list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { struct amd_sched_fence *s_fence = s_job->s_fence; - struct fence *fence; + struct dma_fence *fence; spin_unlock(&sched->job_list_lock); fence = sched->ops->run_job(s_job); atomic_inc(&sched->hw_rq_count); if (fence) { - s_fence->parent = fence_get(fence); - r = fence_add_callback(fence, &s_fence->cb, - amd_sched_process_job); + s_fence->parent = dma_fence_get(fence); + r = dma_fence_add_callback(fence, &s_fence->cb, + amd_sched_process_job); if (r == -ENOENT) amd_sched_process_job(fence, &s_fence->cb); else if (r) DRM_ERROR("fence add callback failed (%d)\n", r); - fence_put(fence); + dma_fence_put(fence); } else { DRM_ERROR("Failed to run job!\n"); amd_sched_process_job(NULL, &s_fence->cb); @@ -446,8 +447,8 @@ void amd_sched_entity_push_job(struct amd_sched_job *sched_job) struct amd_sched_entity *entity = sched_job->s_entity; trace_amd_sched_job(sched_job); - fence_add_callback(&sched_job->s_fence->finished, &sched_job->finish_cb, - amd_sched_job_finish_cb); + dma_fence_add_callback(&sched_job->s_fence->finished, &sched_job->finish_cb, + amd_sched_job_finish_cb); wait_event(entity->sched->job_scheduled, amd_sched_entity_in(sched_job)); } @@ -511,7 +512,7 @@ amd_sched_select_entity(struct amd_gpu_scheduler *sched) return entity; } -static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) +static void amd_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb) { struct amd_sched_fence *s_fence = container_of(cb, struct amd_sched_fence, cb); @@ -521,7 +522,7 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) amd_sched_fence_finished(s_fence); trace_amd_sched_process_job(s_fence); - fence_put(&s_fence->finished); + dma_fence_put(&s_fence->finished); wake_up_interruptible(&sched->wake_up_worker); } @@ -547,7 +548,7 @@ static int amd_sched_main(void *param) struct amd_sched_entity *entity = NULL; struct amd_sched_fence *s_fence; struct amd_sched_job *sched_job; - struct fence *fence; + struct dma_fence *fence; wait_event_interruptible(sched->wake_up_worker, (!amd_sched_blocked(sched) && @@ -569,15 +570,15 @@ static int amd_sched_main(void *param) fence = sched->ops->run_job(sched_job); amd_sched_fence_scheduled(s_fence); if (fence) { - s_fence->parent = fence_get(fence); - r = fence_add_callback(fence, &s_fence->cb, - amd_sched_process_job); + s_fence->parent = dma_fence_get(fence); + r = dma_fence_add_callback(fence, &s_fence->cb, + amd_sched_process_job); if (r == -ENOENT) amd_sched_process_job(fence, &s_fence->cb); else if (r) DRM_ERROR("fence add callback failed (%d)\n", r); - fence_put(fence); + dma_fence_put(fence); } else { DRM_ERROR("Failed to run job!\n"); amd_sched_process_job(NULL, &s_fence->cb); diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 7cbbbfb502ef..876aa43b57df 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -25,7 +25,7 @@ #define _GPU_SCHEDULER_H_ #include -#include +#include struct amd_gpu_scheduler; struct amd_sched_rq; @@ -50,8 +50,8 @@ struct amd_sched_entity { atomic_t fence_seq; uint64_t fence_context; - struct fence *dependency; - struct fence_cb cb; + struct dma_fence *dependency; + struct dma_fence_cb cb; }; /** @@ -66,10 +66,10 @@ struct amd_sched_rq { }; struct amd_sched_fence { - struct fence scheduled; - struct fence finished; - struct fence_cb cb; - struct fence *parent; + struct dma_fence scheduled; + struct dma_fence finished; + struct dma_fence_cb cb; + struct dma_fence *parent; struct amd_gpu_scheduler *sched; spinlock_t lock; void *owner; @@ -79,15 +79,15 @@ struct amd_sched_job { struct amd_gpu_scheduler *sched; struct amd_sched_entity *s_entity; struct amd_sched_fence *s_fence; - struct fence_cb finish_cb; + struct dma_fence_cb finish_cb; struct work_struct finish_work; struct list_head node; struct delayed_work work_tdr; }; -extern const struct fence_ops amd_sched_fence_ops_scheduled; -extern const struct fence_ops amd_sched_fence_ops_finished; -static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f) +extern const struct dma_fence_ops amd_sched_fence_ops_scheduled; +extern const struct dma_fence_ops amd_sched_fence_ops_finished; +static inline struct amd_sched_fence *to_amd_sched_fence(struct dma_fence *f) { if (f->ops == &amd_sched_fence_ops_scheduled) return container_of(f, struct amd_sched_fence, scheduled); @@ -103,8 +103,8 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f) * these functions should be implemented in driver side */ struct amd_sched_backend_ops { - struct fence *(*dependency)(struct amd_sched_job *sched_job); - struct fence *(*run_job)(struct amd_sched_job *sched_job); + struct dma_fence *(*dependency)(struct amd_sched_job *sched_job); + struct dma_fence *(*run_job)(struct amd_sched_job *sched_job); void (*timedout_job)(struct amd_sched_job *sched_job); void (*free_job)(struct amd_sched_job *sched_job); }; diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c index 6b63beaf7574..c26fa298fe9e 100644 --- a/drivers/gpu/drm/amd/scheduler/sched_fence.c +++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c @@ -42,46 +42,50 @@ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *entity, spin_lock_init(&fence->lock); seq = atomic_inc_return(&entity->fence_seq); - fence_init(&fence->scheduled, &amd_sched_fence_ops_scheduled, - &fence->lock, entity->fence_context, seq); - fence_init(&fence->finished, &amd_sched_fence_ops_finished, - &fence->lock, entity->fence_context + 1, seq); + dma_fence_init(&fence->scheduled, &amd_sched_fence_ops_scheduled, + &fence->lock, entity->fence_context, seq); + dma_fence_init(&fence->finished, &amd_sched_fence_ops_finished, + &fence->lock, entity->fence_context + 1, seq); return fence; } void amd_sched_fence_scheduled(struct amd_sched_fence *fence) { - int ret = fence_signal(&fence->scheduled); + int ret = dma_fence_signal(&fence->scheduled); if (!ret) - FENCE_TRACE(&fence->scheduled, "signaled from irq context\n"); + DMA_FENCE_TRACE(&fence->scheduled, + "signaled from irq context\n"); else - FENCE_TRACE(&fence->scheduled, "was already signaled\n"); + DMA_FENCE_TRACE(&fence->scheduled, + "was already signaled\n"); } void amd_sched_fence_finished(struct amd_sched_fence *fence) { - int ret = fence_signal(&fence->finished); + int ret = dma_fence_signal(&fence->finished); if (!ret) - FENCE_TRACE(&fence->finished, "signaled from irq context\n"); + DMA_FENCE_TRACE(&fence->finished, + "signaled from irq context\n"); else - FENCE_TRACE(&fence->finished, "was already signaled\n"); + DMA_FENCE_TRACE(&fence->finished, + "was already signaled\n"); } -static const char *amd_sched_fence_get_driver_name(struct fence *fence) +static const char *amd_sched_fence_get_driver_name(struct dma_fence *fence) { return "amd_sched"; } -static const char *amd_sched_fence_get_timeline_name(struct fence *f) +static const char *amd_sched_fence_get_timeline_name(struct dma_fence *f) { struct amd_sched_fence *fence = to_amd_sched_fence(f); return (const char *)fence->sched->name; } -static bool amd_sched_fence_enable_signaling(struct fence *f) +static bool amd_sched_fence_enable_signaling(struct dma_fence *f) { return true; } @@ -95,10 +99,10 @@ static bool amd_sched_fence_enable_signaling(struct fence *f) */ static void amd_sched_fence_free(struct rcu_head *rcu) { - struct fence *f = container_of(rcu, struct fence, rcu); + struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); struct amd_sched_fence *fence = to_amd_sched_fence(f); - fence_put(fence->parent); + dma_fence_put(fence->parent); kmem_cache_free(sched_fence_slab, fence); } @@ -110,7 +114,7 @@ static void amd_sched_fence_free(struct rcu_head *rcu) * This function is called when the reference count becomes zero. * It just RCU schedules freeing up the fence. */ -static void amd_sched_fence_release_scheduled(struct fence *f) +static void amd_sched_fence_release_scheduled(struct dma_fence *f) { struct amd_sched_fence *fence = to_amd_sched_fence(f); @@ -124,27 +128,27 @@ static void amd_sched_fence_release_scheduled(struct fence *f) * * Drop the extra reference from the scheduled fence to the base fence. */ -static void amd_sched_fence_release_finished(struct fence *f) +static void amd_sched_fence_release_finished(struct dma_fence *f) { struct amd_sched_fence *fence = to_amd_sched_fence(f); - fence_put(&fence->scheduled); + dma_fence_put(&fence->scheduled); } -const struct fence_ops amd_sched_fence_ops_scheduled = { +const struct dma_fence_ops amd_sched_fence_ops_scheduled = { .get_driver_name = amd_sched_fence_get_driver_name, .get_timeline_name = amd_sched_fence_get_timeline_name, .enable_signaling = amd_sched_fence_enable_signaling, .signaled = NULL, - .wait = fence_default_wait, + .wait = dma_fence_default_wait, .release = amd_sched_fence_release_scheduled, }; -const struct fence_ops amd_sched_fence_ops_finished = { +const struct dma_fence_ops amd_sched_fence_ops_finished = { .get_driver_name = amd_sched_fence_get_driver_name, .get_timeline_name = amd_sched_fence_get_timeline_name, .enable_signaling = amd_sched_fence_enable_signaling, .signaled = NULL, - .wait = fence_default_wait, + .wait = dma_fence_default_wait, .release = amd_sched_fence_release_finished, }; diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 1b5a32df9a9a..c32fb3c1d6f0 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1463,7 +1463,7 @@ EXPORT_SYMBOL(drm_atomic_nonblocking_commit); static struct drm_pending_vblank_event *create_vblank_event( struct drm_device *dev, struct drm_file *file_priv, - struct fence *fence, uint64_t user_data) + struct dma_fence *fence, uint64_t user_data) { struct drm_pending_vblank_event *e = NULL; int ret; diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index f9362760bfb2..75ad01d595fd 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include "drm_crtc_internal.h" @@ -1017,7 +1017,7 @@ EXPORT_SYMBOL(drm_atomic_helper_commit_modeset_enables); * drm_atomic_helper_swap_state() so it uses the current plane state (and * just uses the atomic state to find the changed planes) * - * Returns zero if success or < 0 if fence_wait() fails. + * Returns zero if success or < 0 if dma_fence_wait() fails. */ int drm_atomic_helper_wait_for_fences(struct drm_device *dev, struct drm_atomic_state *state, @@ -1041,11 +1041,11 @@ int drm_atomic_helper_wait_for_fences(struct drm_device *dev, * still interrupt the operation. Instead of blocking until the * timer expires, make the wait interruptible. */ - ret = fence_wait(plane_state->fence, pre_swap); + ret = dma_fence_wait(plane_state->fence, pre_swap); if (ret) return ret; - fence_put(plane_state->fence); + dma_fence_put(plane_state->fence); plane_state->fence = NULL; } diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c index 8bed5f459182..cf993dbf602e 100644 --- a/drivers/gpu/drm/drm_fops.c +++ b/drivers/gpu/drm/drm_fops.c @@ -665,7 +665,7 @@ void drm_event_cancel_free(struct drm_device *dev, spin_unlock_irqrestore(&dev->event_lock, flags); if (p->fence) - fence_put(p->fence); + dma_fence_put(p->fence); kfree(p); } @@ -696,8 +696,8 @@ void drm_send_event_locked(struct drm_device *dev, struct drm_pending_event *e) } if (e->fence) { - fence_signal(e->fence); - fence_put(e->fence); + dma_fence_signal(e->fence); + dma_fence_put(e->fence); } if (!e->file_priv) { diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 3755ef935af4..7d066a91d778 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -466,10 +466,10 @@ int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj, } #ifdef CONFIG_DEBUG_FS -static void etnaviv_gem_describe_fence(struct fence *fence, +static void etnaviv_gem_describe_fence(struct dma_fence *fence, const char *type, struct seq_file *m) { - if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) seq_printf(m, "\t%9s: %s %s seq %u\n", type, fence->ops->get_driver_name(fence), @@ -482,7 +482,7 @@ static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m) struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj); struct reservation_object *robj = etnaviv_obj->resv; struct reservation_object_list *fobj; - struct fence *fence; + struct dma_fence *fence; unsigned long off = drm_vma_node_start(&obj->vma_node); seq_printf(m, "%08x: %c %2d (%2d) %08lx %p %zd\n", diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index b1254f885fed..d2211825e5c8 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -15,7 +15,7 @@ */ #include -#include +#include #include #include #include "etnaviv_dump.h" @@ -882,7 +882,7 @@ static void recover_worker(struct work_struct *work) for (i = 0; i < ARRAY_SIZE(gpu->event); i++) { if (!gpu->event[i].used) continue; - fence_signal(gpu->event[i].fence); + dma_fence_signal(gpu->event[i].fence); gpu->event[i].fence = NULL; gpu->event[i].used = false; complete(&gpu->event_free); @@ -952,55 +952,55 @@ static void hangcheck_disable(struct etnaviv_gpu *gpu) /* fence object management */ struct etnaviv_fence { struct etnaviv_gpu *gpu; - struct fence base; + struct dma_fence base; }; -static inline struct etnaviv_fence *to_etnaviv_fence(struct fence *fence) +static inline struct etnaviv_fence *to_etnaviv_fence(struct dma_fence *fence) { return container_of(fence, struct etnaviv_fence, base); } -static const char *etnaviv_fence_get_driver_name(struct fence *fence) +static const char *etnaviv_fence_get_driver_name(struct dma_fence *fence) { return "etnaviv"; } -static const char *etnaviv_fence_get_timeline_name(struct fence *fence) +static const char *etnaviv_fence_get_timeline_name(struct dma_fence *fence) { struct etnaviv_fence *f = to_etnaviv_fence(fence); return dev_name(f->gpu->dev); } -static bool etnaviv_fence_enable_signaling(struct fence *fence) +static bool etnaviv_fence_enable_signaling(struct dma_fence *fence) { return true; } -static bool etnaviv_fence_signaled(struct fence *fence) +static bool etnaviv_fence_signaled(struct dma_fence *fence) { struct etnaviv_fence *f = to_etnaviv_fence(fence); return fence_completed(f->gpu, f->base.seqno); } -static void etnaviv_fence_release(struct fence *fence) +static void etnaviv_fence_release(struct dma_fence *fence) { struct etnaviv_fence *f = to_etnaviv_fence(fence); kfree_rcu(f, base.rcu); } -static const struct fence_ops etnaviv_fence_ops = { +static const struct dma_fence_ops etnaviv_fence_ops = { .get_driver_name = etnaviv_fence_get_driver_name, .get_timeline_name = etnaviv_fence_get_timeline_name, .enable_signaling = etnaviv_fence_enable_signaling, .signaled = etnaviv_fence_signaled, - .wait = fence_default_wait, + .wait = dma_fence_default_wait, .release = etnaviv_fence_release, }; -static struct fence *etnaviv_gpu_fence_alloc(struct etnaviv_gpu *gpu) +static struct dma_fence *etnaviv_gpu_fence_alloc(struct etnaviv_gpu *gpu) { struct etnaviv_fence *f; @@ -1010,8 +1010,8 @@ static struct fence *etnaviv_gpu_fence_alloc(struct etnaviv_gpu *gpu) f->gpu = gpu; - fence_init(&f->base, &etnaviv_fence_ops, &gpu->fence_spinlock, - gpu->fence_context, ++gpu->next_fence); + dma_fence_init(&f->base, &etnaviv_fence_ops, &gpu->fence_spinlock, + gpu->fence_context, ++gpu->next_fence); return &f->base; } @@ -1021,7 +1021,7 @@ int etnaviv_gpu_fence_sync_obj(struct etnaviv_gem_object *etnaviv_obj, { struct reservation_object *robj = etnaviv_obj->resv; struct reservation_object_list *fobj; - struct fence *fence; + struct dma_fence *fence; int i, ret; if (!exclusive) { @@ -1039,7 +1039,7 @@ int etnaviv_gpu_fence_sync_obj(struct etnaviv_gem_object *etnaviv_obj, /* Wait on any existing exclusive fence which isn't our own */ fence = reservation_object_get_excl(robj); if (fence && fence->context != context) { - ret = fence_wait(fence, true); + ret = dma_fence_wait(fence, true); if (ret) return ret; } @@ -1052,7 +1052,7 @@ int etnaviv_gpu_fence_sync_obj(struct etnaviv_gem_object *etnaviv_obj, fence = rcu_dereference_protected(fobj->shared[i], reservation_object_held(robj)); if (fence->context != context) { - ret = fence_wait(fence, true); + ret = dma_fence_wait(fence, true); if (ret) return ret; } @@ -1158,11 +1158,11 @@ static void retire_worker(struct work_struct *work) mutex_lock(&gpu->lock); list_for_each_entry_safe(cmdbuf, tmp, &gpu->active_cmd_list, node) { - if (!fence_is_signaled(cmdbuf->fence)) + if (!dma_fence_is_signaled(cmdbuf->fence)) break; list_del(&cmdbuf->node); - fence_put(cmdbuf->fence); + dma_fence_put(cmdbuf->fence); for (i = 0; i < cmdbuf->nr_bos; i++) { struct etnaviv_vram_mapping *mapping = cmdbuf->bo_map[i]; @@ -1275,7 +1275,7 @@ void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu) int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf) { - struct fence *fence; + struct dma_fence *fence; unsigned int event, i; int ret; @@ -1391,7 +1391,7 @@ static irqreturn_t irq_handler(int irq, void *data) } while ((event = ffs(intr)) != 0) { - struct fence *fence; + struct dma_fence *fence; event -= 1; @@ -1401,7 +1401,7 @@ static irqreturn_t irq_handler(int irq, void *data) fence = gpu->event[event].fence; gpu->event[event].fence = NULL; - fence_signal(fence); + dma_fence_signal(fence); /* * Events can be processed out of order. Eg, @@ -1553,7 +1553,7 @@ static int etnaviv_gpu_bind(struct device *dev, struct device *master, return ret; gpu->drm = drm; - gpu->fence_context = fence_context_alloc(1); + gpu->fence_context = dma_fence_context_alloc(1); spin_lock_init(&gpu->fence_spinlock); INIT_LIST_HEAD(&gpu->active_cmd_list); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index 73c278dc3706..8c6b824e9d0a 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -89,7 +89,7 @@ struct etnaviv_chip_identity { struct etnaviv_event { bool used; - struct fence *fence; + struct dma_fence *fence; }; struct etnaviv_cmdbuf; @@ -163,7 +163,7 @@ struct etnaviv_cmdbuf { /* vram node used if the cmdbuf is mapped through the MMUv2 */ struct drm_mm_node vram_node; /* fence after which this buffer is to be disposed */ - struct fence *fence; + struct dma_fence *fence; /* target exec state */ u32 exec_state; /* per GPU in-flight list */ diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 74ede1f53372..f9af2a00625e 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -26,12 +26,12 @@ #include "i915_drv.h" -static const char *i915_fence_get_driver_name(struct fence *fence) +static const char *i915_fence_get_driver_name(struct dma_fence *fence) { return "i915"; } -static const char *i915_fence_get_timeline_name(struct fence *fence) +static const char *i915_fence_get_timeline_name(struct dma_fence *fence) { /* Timelines are bound by eviction to a VM. However, since * we only have a global seqno at the moment, we only have @@ -42,12 +42,12 @@ static const char *i915_fence_get_timeline_name(struct fence *fence) return "global"; } -static bool i915_fence_signaled(struct fence *fence) +static bool i915_fence_signaled(struct dma_fence *fence) { return i915_gem_request_completed(to_request(fence)); } -static bool i915_fence_enable_signaling(struct fence *fence) +static bool i915_fence_enable_signaling(struct dma_fence *fence) { if (i915_fence_signaled(fence)) return false; @@ -56,7 +56,7 @@ static bool i915_fence_enable_signaling(struct fence *fence) return true; } -static signed long i915_fence_wait(struct fence *fence, +static signed long i915_fence_wait(struct dma_fence *fence, bool interruptible, signed long timeout_jiffies) { @@ -85,26 +85,26 @@ static signed long i915_fence_wait(struct fence *fence, return timeout_jiffies; } -static void i915_fence_value_str(struct fence *fence, char *str, int size) +static void i915_fence_value_str(struct dma_fence *fence, char *str, int size) { snprintf(str, size, "%u", fence->seqno); } -static void i915_fence_timeline_value_str(struct fence *fence, char *str, +static void i915_fence_timeline_value_str(struct dma_fence *fence, char *str, int size) { snprintf(str, size, "%u", intel_engine_get_seqno(to_request(fence)->engine)); } -static void i915_fence_release(struct fence *fence) +static void i915_fence_release(struct dma_fence *fence) { struct drm_i915_gem_request *req = to_request(fence); kmem_cache_free(req->i915->requests, req); } -const struct fence_ops i915_fence_ops = { +const struct dma_fence_ops i915_fence_ops = { .get_driver_name = i915_fence_get_driver_name, .get_timeline_name = i915_fence_get_timeline_name, .enable_signaling = i915_fence_enable_signaling, @@ -388,8 +388,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, * The reference count is incremented atomically. If it is zero, * the lookup knows the request is unallocated and complete. Otherwise, * it is either still in use, or has been reallocated and reset - * with fence_init(). This increment is safe for release as we check - * that the request we have a reference to and matches the active + * with dma_fence_init(). This increment is safe for release as we + * check that the request we have a reference to and matches the active * request. * * Before we increment the refcount, we chase the request->engine @@ -412,11 +412,11 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, goto err; spin_lock_init(&req->lock); - fence_init(&req->fence, - &i915_fence_ops, - &req->lock, - engine->fence_context, - seqno); + dma_fence_init(&req->fence, + &i915_fence_ops, + &req->lock, + engine->fence_context, + seqno); i915_sw_fence_init(&req->submit, submit_notify); diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 974bd7bcc801..bceeaa3a5193 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -25,7 +25,7 @@ #ifndef I915_GEM_REQUEST_H #define I915_GEM_REQUEST_H -#include +#include #include "i915_gem.h" #include "i915_sw_fence.h" @@ -62,7 +62,7 @@ struct intel_signal_node { * The requests are reference counted. */ struct drm_i915_gem_request { - struct fence fence; + struct dma_fence fence; spinlock_t lock; /** On Which ring this request was generated */ @@ -145,9 +145,9 @@ struct drm_i915_gem_request { struct list_head execlist_link; }; -extern const struct fence_ops i915_fence_ops; +extern const struct dma_fence_ops i915_fence_ops; -static inline bool fence_is_i915(struct fence *fence) +static inline bool fence_is_i915(struct dma_fence *fence) { return fence->ops == &i915_fence_ops; } @@ -172,7 +172,7 @@ i915_gem_request_get_engine(struct drm_i915_gem_request *req) } static inline struct drm_i915_gem_request * -to_request(struct fence *fence) +to_request(struct dma_fence *fence) { /* We assume that NULL fence/request are interoperable */ BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0); @@ -183,19 +183,19 @@ to_request(struct fence *fence) static inline struct drm_i915_gem_request * i915_gem_request_get(struct drm_i915_gem_request *req) { - return to_request(fence_get(&req->fence)); + return to_request(dma_fence_get(&req->fence)); } static inline struct drm_i915_gem_request * i915_gem_request_get_rcu(struct drm_i915_gem_request *req) { - return to_request(fence_get_rcu(&req->fence)); + return to_request(dma_fence_get_rcu(&req->fence)); } static inline void i915_gem_request_put(struct drm_i915_gem_request *req) { - fence_put(&req->fence); + dma_fence_put(&req->fence); } static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, @@ -497,7 +497,7 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active) * compiler. * * The atomic operation at the heart of - * i915_gem_request_get_rcu(), see fence_get_rcu(), is + * i915_gem_request_get_rcu(), see dma_fence_get_rcu(), is * atomic_inc_not_zero() which is only a full memory barrier * when successful. That is, if i915_gem_request_get_rcu() * returns the request (and so with the reference counted diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 1e5cbc585ca2..8185002d7ec8 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -8,7 +8,7 @@ */ #include -#include +#include #include #include "i915_sw_fence.h" @@ -226,49 +226,50 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, return pending; } -struct dma_fence_cb { - struct fence_cb base; +struct i915_sw_dma_fence_cb { + struct dma_fence_cb base; struct i915_sw_fence *fence; - struct fence *dma; + struct dma_fence *dma; struct timer_list timer; }; static void timer_i915_sw_fence_wake(unsigned long data) { - struct dma_fence_cb *cb = (struct dma_fence_cb *)data; + struct i915_sw_dma_fence_cb *cb = (struct i915_sw_dma_fence_cb *)data; printk(KERN_WARNING "asynchronous wait on fence %s:%s:%x timed out\n", cb->dma->ops->get_driver_name(cb->dma), cb->dma->ops->get_timeline_name(cb->dma), cb->dma->seqno); - fence_put(cb->dma); + dma_fence_put(cb->dma); cb->dma = NULL; i915_sw_fence_commit(cb->fence); cb->timer.function = NULL; } -static void dma_i915_sw_fence_wake(struct fence *dma, struct fence_cb *data) +static void dma_i915_sw_fence_wake(struct dma_fence *dma, + struct dma_fence_cb *data) { - struct dma_fence_cb *cb = container_of(data, typeof(*cb), base); + struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base); del_timer_sync(&cb->timer); if (cb->timer.function) i915_sw_fence_commit(cb->fence); - fence_put(cb->dma); + dma_fence_put(cb->dma); kfree(cb); } int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, - struct fence *dma, + struct dma_fence *dma, unsigned long timeout, gfp_t gfp) { - struct dma_fence_cb *cb; + struct i915_sw_dma_fence_cb *cb; int ret; - if (fence_is_signaled(dma)) + if (dma_fence_is_signaled(dma)) return 0; cb = kmalloc(sizeof(*cb), gfp); @@ -276,7 +277,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, if (!gfpflags_allow_blocking(gfp)) return -ENOMEM; - return fence_wait(dma, false); + return dma_fence_wait(dma, false); } cb->fence = i915_sw_fence_get(fence); @@ -287,11 +288,11 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, timer_i915_sw_fence_wake, (unsigned long)cb, TIMER_IRQSAFE); if (timeout) { - cb->dma = fence_get(dma); + cb->dma = dma_fence_get(dma); mod_timer(&cb->timer, round_jiffies_up(jiffies + timeout)); } - ret = fence_add_callback(dma, &cb->base, dma_i915_sw_fence_wake); + ret = dma_fence_add_callback(dma, &cb->base, dma_i915_sw_fence_wake); if (ret == 0) { ret = 1; } else { @@ -305,16 +306,16 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, struct reservation_object *resv, - const struct fence_ops *exclude, + const struct dma_fence_ops *exclude, bool write, unsigned long timeout, gfp_t gfp) { - struct fence *excl; + struct dma_fence *excl; int ret = 0, pending; if (write) { - struct fence **shared; + struct dma_fence **shared; unsigned int count, i; ret = reservation_object_get_fences_rcu(resv, @@ -339,7 +340,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, } for (i = 0; i < count; i++) - fence_put(shared[i]); + dma_fence_put(shared[i]); kfree(shared); } else { excl = reservation_object_get_excl_rcu(resv); @@ -356,7 +357,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, ret |= pending; } - fence_put(excl); + dma_fence_put(excl); return ret; } diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h index 373141602ca4..cd239e92f67f 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.h +++ b/drivers/gpu/drm/i915/i915_sw_fence.h @@ -16,8 +16,8 @@ #include struct completion; -struct fence; -struct fence_ops; +struct dma_fence; +struct dma_fence_ops; struct reservation_object; struct i915_sw_fence { @@ -47,12 +47,12 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, struct i915_sw_fence *after, wait_queue_t *wq); int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, - struct fence *dma, + struct dma_fence *dma, unsigned long timeout, gfp_t gfp); int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, struct reservation_object *resv, - const struct fence_ops *exclude, + const struct dma_fence_ops *exclude, bool write, unsigned long timeout, gfp_t gfp); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 178798002a73..5c912c25f7d3 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -491,7 +491,7 @@ TRACE_EVENT(i915_gem_ring_dispatch, __entry->ring = req->engine->id; __entry->seqno = req->fence.seqno; __entry->flags = flags; - fence_enable_sw_signaling(&req->fence); + dma_fence_enable_sw_signaling(&req->fence); ), TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x", diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 23fc1042fed4..56efcc507ea2 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -464,7 +464,7 @@ static int intel_breadcrumbs_signaler(void *arg) &request->signaling.wait); local_bh_disable(); - fence_signal(&request->fence); + dma_fence_signal(&request->fence); local_bh_enable(); /* kick start the tasklets */ /* Find the next oldest signal. Note that as we have @@ -502,7 +502,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) struct rb_node *parent, **p; bool first, wakeup; - /* locked by fence_enable_sw_signaling() */ + /* locked by dma_fence_enable_sw_signaling() */ assert_spin_locked(&request->lock); request->signaling.wait.tsk = b->signaler; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 2dc94812bea5..8cceb345aa0f 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -245,7 +245,7 @@ void intel_engine_setup_common(struct intel_engine_cs *engine) INIT_LIST_HEAD(&engine->execlist_queue); spin_lock_init(&engine->execlist_lock); - engine->fence_context = fence_context_alloc(1); + engine->fence_context = dma_fence_context_alloc(1); intel_engine_init_requests(engine); intel_engine_init_hangcheck(engine); diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index d0da52f2a806..940bf4992fe2 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -217,7 +217,7 @@ void msm_gem_vunmap(struct drm_gem_object *obj); int msm_gem_sync_object(struct drm_gem_object *obj, struct msm_fence_context *fctx, bool exclusive); void msm_gem_move_to_active(struct drm_gem_object *obj, - struct msm_gpu *gpu, bool exclusive, struct fence *fence); + struct msm_gpu *gpu, bool exclusive, struct dma_fence *fence); void msm_gem_move_to_inactive(struct drm_gem_object *obj); int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout); int msm_gem_cpu_fini(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c index a9b9b1c95a2e..3f299c537b77 100644 --- a/drivers/gpu/drm/msm/msm_fence.c +++ b/drivers/gpu/drm/msm/msm_fence.c @@ -15,7 +15,7 @@ * this program. If not, see . */ -#include +#include #include "msm_drv.h" #include "msm_fence.h" @@ -32,7 +32,7 @@ msm_fence_context_alloc(struct drm_device *dev, const char *name) fctx->dev = dev; fctx->name = name; - fctx->context = fence_context_alloc(1); + fctx->context = dma_fence_context_alloc(1); init_waitqueue_head(&fctx->event); spin_lock_init(&fctx->spinlock); @@ -100,52 +100,52 @@ void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence) struct msm_fence { struct msm_fence_context *fctx; - struct fence base; + struct dma_fence base; }; -static inline struct msm_fence *to_msm_fence(struct fence *fence) +static inline struct msm_fence *to_msm_fence(struct dma_fence *fence) { return container_of(fence, struct msm_fence, base); } -static const char *msm_fence_get_driver_name(struct fence *fence) +static const char *msm_fence_get_driver_name(struct dma_fence *fence) { return "msm"; } -static const char *msm_fence_get_timeline_name(struct fence *fence) +static const char *msm_fence_get_timeline_name(struct dma_fence *fence) { struct msm_fence *f = to_msm_fence(fence); return f->fctx->name; } -static bool msm_fence_enable_signaling(struct fence *fence) +static bool msm_fence_enable_signaling(struct dma_fence *fence) { return true; } -static bool msm_fence_signaled(struct fence *fence) +static bool msm_fence_signaled(struct dma_fence *fence) { struct msm_fence *f = to_msm_fence(fence); return fence_completed(f->fctx, f->base.seqno); } -static void msm_fence_release(struct fence *fence) +static void msm_fence_release(struct dma_fence *fence) { struct msm_fence *f = to_msm_fence(fence); kfree_rcu(f, base.rcu); } -static const struct fence_ops msm_fence_ops = { +static const struct dma_fence_ops msm_fence_ops = { .get_driver_name = msm_fence_get_driver_name, .get_timeline_name = msm_fence_get_timeline_name, .enable_signaling = msm_fence_enable_signaling, .signaled = msm_fence_signaled, - .wait = fence_default_wait, + .wait = dma_fence_default_wait, .release = msm_fence_release, }; -struct fence * +struct dma_fence * msm_fence_alloc(struct msm_fence_context *fctx) { struct msm_fence *f; @@ -156,8 +156,8 @@ msm_fence_alloc(struct msm_fence_context *fctx) f->fctx = fctx; - fence_init(&f->base, &msm_fence_ops, &fctx->spinlock, - fctx->context, ++fctx->last_fence); + dma_fence_init(&f->base, &msm_fence_ops, &fctx->spinlock, + fctx->context, ++fctx->last_fence); return &f->base; } diff --git a/drivers/gpu/drm/msm/msm_fence.h b/drivers/gpu/drm/msm/msm_fence.h index ceb5b3d314b4..56061aa1959d 100644 --- a/drivers/gpu/drm/msm/msm_fence.h +++ b/drivers/gpu/drm/msm/msm_fence.h @@ -41,6 +41,6 @@ int msm_queue_fence_cb(struct msm_fence_context *fctx, struct msm_fence_cb *cb, uint32_t fence); void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence); -struct fence * msm_fence_alloc(struct msm_fence_context *fctx); +struct dma_fence * msm_fence_alloc(struct msm_fence_context *fctx); #endif diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index b6ac27e31929..57db7dbbb618 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -521,7 +521,7 @@ int msm_gem_sync_object(struct drm_gem_object *obj, { struct msm_gem_object *msm_obj = to_msm_bo(obj); struct reservation_object_list *fobj; - struct fence *fence; + struct dma_fence *fence; int i, ret; if (!exclusive) { @@ -540,7 +540,7 @@ int msm_gem_sync_object(struct drm_gem_object *obj, fence = reservation_object_get_excl(msm_obj->resv); /* don't need to wait on our own fences, since ring is fifo */ if (fence && (fence->context != fctx->context)) { - ret = fence_wait(fence, true); + ret = dma_fence_wait(fence, true); if (ret) return ret; } @@ -553,7 +553,7 @@ int msm_gem_sync_object(struct drm_gem_object *obj, fence = rcu_dereference_protected(fobj->shared[i], reservation_object_held(msm_obj->resv)); if (fence->context != fctx->context) { - ret = fence_wait(fence, true); + ret = dma_fence_wait(fence, true); if (ret) return ret; } @@ -563,7 +563,7 @@ int msm_gem_sync_object(struct drm_gem_object *obj, } void msm_gem_move_to_active(struct drm_gem_object *obj, - struct msm_gpu *gpu, bool exclusive, struct fence *fence) + struct msm_gpu *gpu, bool exclusive, struct dma_fence *fence) { struct msm_gem_object *msm_obj = to_msm_bo(obj); WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED); @@ -616,10 +616,10 @@ int msm_gem_cpu_fini(struct drm_gem_object *obj) } #ifdef CONFIG_DEBUG_FS -static void describe_fence(struct fence *fence, const char *type, +static void describe_fence(struct dma_fence *fence, const char *type, struct seq_file *m) { - if (!fence_is_signaled(fence)) + if (!dma_fence_is_signaled(fence)) seq_printf(m, "\t%9s: %s %s seq %u\n", type, fence->ops->get_driver_name(fence), fence->ops->get_timeline_name(fence), @@ -631,7 +631,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m) struct msm_gem_object *msm_obj = to_msm_bo(obj); struct reservation_object *robj = msm_obj->resv; struct reservation_object_list *fobj; - struct fence *fence; + struct dma_fence *fence; uint64_t off = drm_vma_node_start(&obj->vma_node); const char *madv; diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index b2f13cfe945e..2cb8551fda70 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -104,7 +104,7 @@ struct msm_gem_submit { struct list_head node; /* node in gpu submit_list */ struct list_head bo_list; struct ww_acquire_ctx ticket; - struct fence *fence; + struct dma_fence *fence; struct pid *pid; /* submitting process */ bool valid; /* true if no cmdstream patching needed */ unsigned int nr_cmds; diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index b6a0f37a65f3..25e8786fa4ca 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -60,7 +60,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, void msm_gem_submit_free(struct msm_gem_submit *submit) { - fence_put(submit->fence); + dma_fence_put(submit->fence); list_del(&submit->node); put_pid(submit->pid); kfree(submit); @@ -380,7 +380,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct msm_file_private *ctx = file->driver_priv; struct msm_gem_submit *submit; struct msm_gpu *gpu = priv->gpu; - struct fence *in_fence = NULL; + struct dma_fence *in_fence = NULL; struct sync_file *sync_file = NULL; int out_fence_fd = -1; unsigned i; @@ -439,7 +439,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, */ if (in_fence->context != gpu->fctx->context) { - ret = fence_wait(in_fence, true); + ret = dma_fence_wait(in_fence, true); if (ret) goto out; } @@ -542,7 +542,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, out: if (in_fence) - fence_put(in_fence); + dma_fence_put(in_fence); submit_cleanup(submit); if (ret) msm_gem_submit_free(submit); diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 5bb09838b5ae..3249707e6834 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -476,7 +476,7 @@ static void retire_submits(struct msm_gpu *gpu) submit = list_first_entry(&gpu->submit_list, struct msm_gem_submit, node); - if (fence_is_signaled(submit->fence)) { + if (dma_fence_is_signaled(submit->fence)) { retire_submit(gpu, submit); } else { break; diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 343b8659472c..ec8ac756aab4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -83,13 +83,13 @@ nv10_bo_get_tile_region(struct drm_device *dev, int i) static void nv10_bo_put_tile_region(struct drm_device *dev, struct nouveau_drm_tile *tile, - struct fence *fence) + struct dma_fence *fence) { struct nouveau_drm *drm = nouveau_drm(dev); if (tile) { spin_lock(&drm->tile.lock); - tile->fence = (struct nouveau_fence *)fence_get(fence); + tile->fence = (struct nouveau_fence *)dma_fence_get(fence); tile->used = false; spin_unlock(&drm->tile.lock); } @@ -1243,7 +1243,7 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo, { struct nouveau_drm *drm = nouveau_bdev(bo->bdev); struct drm_device *dev = drm->dev; - struct fence *fence = reservation_object_get_excl(bo->resv); + struct dma_fence *fence = reservation_object_get_excl(bo->resv); nv10_bo_put_tile_region(dev, *old_tile, fence); *old_tile = new_tile; diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 4bb9ab892ae1..e9529ee6bc23 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -28,7 +28,7 @@ #include #include -#include +#include #include #include @@ -38,11 +38,11 @@ #include "nouveau_dma.h" #include "nouveau_fence.h" -static const struct fence_ops nouveau_fence_ops_uevent; -static const struct fence_ops nouveau_fence_ops_legacy; +static const struct dma_fence_ops nouveau_fence_ops_uevent; +static const struct dma_fence_ops nouveau_fence_ops_legacy; static inline struct nouveau_fence * -from_fence(struct fence *fence) +from_fence(struct dma_fence *fence) { return container_of(fence, struct nouveau_fence, base); } @@ -58,23 +58,23 @@ nouveau_fence_signal(struct nouveau_fence *fence) { int drop = 0; - fence_signal_locked(&fence->base); + dma_fence_signal_locked(&fence->base); list_del(&fence->head); rcu_assign_pointer(fence->channel, NULL); - if (test_bit(FENCE_FLAG_USER_BITS, &fence->base.flags)) { + if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) { struct nouveau_fence_chan *fctx = nouveau_fctx(fence); if (!--fctx->notify_ref) drop = 1; } - fence_put(&fence->base); + dma_fence_put(&fence->base); return drop; } static struct nouveau_fence * -nouveau_local_fence(struct fence *fence, struct nouveau_drm *drm) { +nouveau_local_fence(struct dma_fence *fence, struct nouveau_drm *drm) { struct nouveau_fence_priv *priv = (void*)drm->fence; if (fence->ops != &nouveau_fence_ops_legacy && @@ -201,7 +201,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha struct nouveau_fence_work { struct work_struct work; - struct fence_cb cb; + struct dma_fence_cb cb; void (*func)(void *); void *data; }; @@ -214,7 +214,7 @@ nouveau_fence_work_handler(struct work_struct *kwork) kfree(work); } -static void nouveau_fence_work_cb(struct fence *fence, struct fence_cb *cb) +static void nouveau_fence_work_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { struct nouveau_fence_work *work = container_of(cb, typeof(*work), cb); @@ -222,12 +222,12 @@ static void nouveau_fence_work_cb(struct fence *fence, struct fence_cb *cb) } void -nouveau_fence_work(struct fence *fence, +nouveau_fence_work(struct dma_fence *fence, void (*func)(void *), void *data) { struct nouveau_fence_work *work; - if (fence_is_signaled(fence)) + if (dma_fence_is_signaled(fence)) goto err; work = kmalloc(sizeof(*work), GFP_KERNEL); @@ -245,7 +245,7 @@ nouveau_fence_work(struct fence *fence, work->func = func; work->data = data; - if (fence_add_callback(fence, &work->cb, nouveau_fence_work_cb) < 0) + if (dma_fence_add_callback(fence, &work->cb, nouveau_fence_work_cb) < 0) goto err_free; return; @@ -266,17 +266,17 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan) fence->timeout = jiffies + (15 * HZ); if (priv->uevent) - fence_init(&fence->base, &nouveau_fence_ops_uevent, - &fctx->lock, fctx->context, ++fctx->sequence); + dma_fence_init(&fence->base, &nouveau_fence_ops_uevent, + &fctx->lock, fctx->context, ++fctx->sequence); else - fence_init(&fence->base, &nouveau_fence_ops_legacy, - &fctx->lock, fctx->context, ++fctx->sequence); + dma_fence_init(&fence->base, &nouveau_fence_ops_legacy, + &fctx->lock, fctx->context, ++fctx->sequence); kref_get(&fctx->fence_ref); - trace_fence_emit(&fence->base); + trace_dma_fence_emit(&fence->base); ret = fctx->emit(fence); if (!ret) { - fence_get(&fence->base); + dma_fence_get(&fence->base); spin_lock_irq(&fctx->lock); if (nouveau_fence_update(chan, fctx)) @@ -298,7 +298,7 @@ nouveau_fence_done(struct nouveau_fence *fence) struct nouveau_channel *chan; unsigned long flags; - if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags)) + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->base.flags)) return true; spin_lock_irqsave(&fctx->lock, flags); @@ -307,11 +307,11 @@ nouveau_fence_done(struct nouveau_fence *fence) nvif_notify_put(&fctx->notify); spin_unlock_irqrestore(&fctx->lock, flags); } - return fence_is_signaled(&fence->base); + return dma_fence_is_signaled(&fence->base); } static long -nouveau_fence_wait_legacy(struct fence *f, bool intr, long wait) +nouveau_fence_wait_legacy(struct dma_fence *f, bool intr, long wait) { struct nouveau_fence *fence = from_fence(f); unsigned long sleep_time = NSEC_PER_MSEC / 1000; @@ -378,7 +378,7 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr) if (!lazy) return nouveau_fence_wait_busy(fence, intr); - ret = fence_wait_timeout(&fence->base, intr, 15 * HZ); + ret = dma_fence_wait_timeout(&fence->base, intr, 15 * HZ); if (ret < 0) return ret; else if (!ret) @@ -391,7 +391,7 @@ int nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool exclusive, bool intr) { struct nouveau_fence_chan *fctx = chan->fence; - struct fence *fence; + struct dma_fence *fence; struct reservation_object *resv = nvbo->bo.resv; struct reservation_object_list *fobj; struct nouveau_fence *f; @@ -421,7 +421,7 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e } if (must_wait) - ret = fence_wait(fence, intr); + ret = dma_fence_wait(fence, intr); return ret; } @@ -446,7 +446,7 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e } if (must_wait) - ret = fence_wait(fence, intr); + ret = dma_fence_wait(fence, intr); } return ret; @@ -456,7 +456,7 @@ void nouveau_fence_unref(struct nouveau_fence **pfence) { if (*pfence) - fence_put(&(*pfence)->base); + dma_fence_put(&(*pfence)->base); *pfence = NULL; } @@ -484,12 +484,12 @@ nouveau_fence_new(struct nouveau_channel *chan, bool sysmem, return ret; } -static const char *nouveau_fence_get_get_driver_name(struct fence *fence) +static const char *nouveau_fence_get_get_driver_name(struct dma_fence *fence) { return "nouveau"; } -static const char *nouveau_fence_get_timeline_name(struct fence *f) +static const char *nouveau_fence_get_timeline_name(struct dma_fence *f) { struct nouveau_fence *fence = from_fence(f); struct nouveau_fence_chan *fctx = nouveau_fctx(fence); @@ -503,7 +503,7 @@ static const char *nouveau_fence_get_timeline_name(struct fence *f) * result. The drm node should still be there, so we can derive the index from * the fence context. */ -static bool nouveau_fence_is_signaled(struct fence *f) +static bool nouveau_fence_is_signaled(struct dma_fence *f) { struct nouveau_fence *fence = from_fence(f); struct nouveau_fence_chan *fctx = nouveau_fctx(fence); @@ -519,7 +519,7 @@ static bool nouveau_fence_is_signaled(struct fence *f) return ret; } -static bool nouveau_fence_no_signaling(struct fence *f) +static bool nouveau_fence_no_signaling(struct dma_fence *f) { struct nouveau_fence *fence = from_fence(f); @@ -530,30 +530,30 @@ static bool nouveau_fence_no_signaling(struct fence *f) WARN_ON(atomic_read(&fence->base.refcount.refcount) <= 1); /* - * This needs uevents to work correctly, but fence_add_callback relies on + * This needs uevents to work correctly, but dma_fence_add_callback relies on * being able to enable signaling. It will still get signaled eventually, * just not right away. */ if (nouveau_fence_is_signaled(f)) { list_del(&fence->head); - fence_put(&fence->base); + dma_fence_put(&fence->base); return false; } return true; } -static void nouveau_fence_release(struct fence *f) +static void nouveau_fence_release(struct dma_fence *f) { struct nouveau_fence *fence = from_fence(f); struct nouveau_fence_chan *fctx = nouveau_fctx(fence); kref_put(&fctx->fence_ref, nouveau_fence_context_put); - fence_free(&fence->base); + dma_fence_free(&fence->base); } -static const struct fence_ops nouveau_fence_ops_legacy = { +static const struct dma_fence_ops nouveau_fence_ops_legacy = { .get_driver_name = nouveau_fence_get_get_driver_name, .get_timeline_name = nouveau_fence_get_timeline_name, .enable_signaling = nouveau_fence_no_signaling, @@ -562,7 +562,7 @@ static const struct fence_ops nouveau_fence_ops_legacy = { .release = nouveau_fence_release }; -static bool nouveau_fence_enable_signaling(struct fence *f) +static bool nouveau_fence_enable_signaling(struct dma_fence *f) { struct nouveau_fence *fence = from_fence(f); struct nouveau_fence_chan *fctx = nouveau_fctx(fence); @@ -573,18 +573,18 @@ static bool nouveau_fence_enable_signaling(struct fence *f) ret = nouveau_fence_no_signaling(f); if (ret) - set_bit(FENCE_FLAG_USER_BITS, &fence->base.flags); + set_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags); else if (!--fctx->notify_ref) nvif_notify_put(&fctx->notify); return ret; } -static const struct fence_ops nouveau_fence_ops_uevent = { +static const struct dma_fence_ops nouveau_fence_ops_uevent = { .get_driver_name = nouveau_fence_get_get_driver_name, .get_timeline_name = nouveau_fence_get_timeline_name, .enable_signaling = nouveau_fence_enable_signaling, .signaled = nouveau_fence_is_signaled, - .wait = fence_default_wait, + .wait = dma_fence_default_wait, .release = NULL }; diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 64c4ce7115ad..41f3c019e534 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -1,14 +1,14 @@ #ifndef __NOUVEAU_FENCE_H__ #define __NOUVEAU_FENCE_H__ -#include +#include #include struct nouveau_drm; struct nouveau_bo; struct nouveau_fence { - struct fence base; + struct dma_fence base; struct list_head head; @@ -24,7 +24,7 @@ void nouveau_fence_unref(struct nouveau_fence **); int nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *); bool nouveau_fence_done(struct nouveau_fence *); -void nouveau_fence_work(struct fence *, void (*)(void *), void *); +void nouveau_fence_work(struct dma_fence *, void (*)(void *), void *); int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr); int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr); diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 0bd7164bc817..7f083c95f422 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -119,7 +119,7 @@ nouveau_gem_object_unmap(struct nouveau_bo *nvbo, struct nvkm_vma *vma) const bool mapped = nvbo->bo.mem.mem_type != TTM_PL_SYSTEM; struct reservation_object *resv = nvbo->bo.resv; struct reservation_object_list *fobj; - struct fence *fence = NULL; + struct dma_fence *fence = NULL; fobj = reservation_object_get_list(resv); diff --git a/drivers/gpu/drm/nouveau/nv04_fence.c b/drivers/gpu/drm/nouveau/nv04_fence.c index 1915b7b82a59..fa8f2375c398 100644 --- a/drivers/gpu/drm/nouveau/nv04_fence.c +++ b/drivers/gpu/drm/nouveau/nv04_fence.c @@ -110,6 +110,6 @@ nv04_fence_create(struct nouveau_drm *drm) priv->base.context_new = nv04_fence_context_new; priv->base.context_del = nv04_fence_context_del; priv->base.contexts = 15; - priv->base.context_base = fence_context_alloc(priv->base.contexts); + priv->base.context_base = dma_fence_context_alloc(priv->base.contexts); return 0; } diff --git a/drivers/gpu/drm/nouveau/nv10_fence.c b/drivers/gpu/drm/nouveau/nv10_fence.c index 4e3de34ff6f4..f99fcf56928a 100644 --- a/drivers/gpu/drm/nouveau/nv10_fence.c +++ b/drivers/gpu/drm/nouveau/nv10_fence.c @@ -107,7 +107,7 @@ nv10_fence_create(struct nouveau_drm *drm) priv->base.context_new = nv10_fence_context_new; priv->base.context_del = nv10_fence_context_del; priv->base.contexts = 31; - priv->base.context_base = fence_context_alloc(priv->base.contexts); + priv->base.context_base = dma_fence_context_alloc(priv->base.contexts); spin_lock_init(&priv->lock); return 0; } diff --git a/drivers/gpu/drm/nouveau/nv17_fence.c b/drivers/gpu/drm/nouveau/nv17_fence.c index 7d5e562a55c5..79bc01111351 100644 --- a/drivers/gpu/drm/nouveau/nv17_fence.c +++ b/drivers/gpu/drm/nouveau/nv17_fence.c @@ -126,7 +126,7 @@ nv17_fence_create(struct nouveau_drm *drm) priv->base.context_new = nv17_fence_context_new; priv->base.context_del = nv10_fence_context_del; priv->base.contexts = 31; - priv->base.context_base = fence_context_alloc(priv->base.contexts); + priv->base.context_base = dma_fence_context_alloc(priv->base.contexts); spin_lock_init(&priv->lock); ret = nouveau_bo_new(drm->dev, 4096, 0x1000, TTM_PL_FLAG_VRAM, diff --git a/drivers/gpu/drm/nouveau/nv50_fence.c b/drivers/gpu/drm/nouveau/nv50_fence.c index 4d6f202b7770..8c5295414578 100644 --- a/drivers/gpu/drm/nouveau/nv50_fence.c +++ b/drivers/gpu/drm/nouveau/nv50_fence.c @@ -97,7 +97,7 @@ nv50_fence_create(struct nouveau_drm *drm) priv->base.context_new = nv50_fence_context_new; priv->base.context_del = nv10_fence_context_del; priv->base.contexts = 127; - priv->base.context_base = fence_context_alloc(priv->base.contexts); + priv->base.context_base = dma_fence_context_alloc(priv->base.contexts); spin_lock_init(&priv->lock); ret = nouveau_bo_new(drm->dev, 4096, 0x1000, TTM_PL_FLAG_VRAM, diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c index 18bde9d8e6d6..23ef04b4e0b2 100644 --- a/drivers/gpu/drm/nouveau/nv84_fence.c +++ b/drivers/gpu/drm/nouveau/nv84_fence.c @@ -229,7 +229,7 @@ nv84_fence_create(struct nouveau_drm *drm) priv->base.context_del = nv84_fence_context_del; priv->base.contexts = fifo->nr; - priv->base.context_base = fence_context_alloc(priv->base.contexts); + priv->base.context_base = dma_fence_context_alloc(priv->base.contexts); priv->base.uevent = true; /* Use VRAM if there is any ; otherwise fallback to system memory */ diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index 5f3e5ad99de7..84995ebc6ffc 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -31,7 +31,7 @@ * Definitions taken from spice-protocol, plus kernel driver specific bits. */ -#include +#include #include #include #include @@ -190,7 +190,7 @@ enum { * spice-protocol/qxl_dev.h */ #define QXL_MAX_RES 96 struct qxl_release { - struct fence base; + struct dma_fence base; int id; int type; diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index cd83f050cf3e..50b4e522f05f 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -21,7 +21,7 @@ */ #include "qxl_drv.h" #include "qxl_object.h" -#include +#include /* * drawable cmd cache - allocate a bunch of VRAM pages, suballocate @@ -40,23 +40,24 @@ static const int release_size_per_bo[] = { RELEASE_SIZE, SURFACE_RELEASE_SIZE, RELEASE_SIZE }; static const int releases_per_bo[] = { RELEASES_PER_BO, SURFACE_RELEASES_PER_BO, RELEASES_PER_BO }; -static const char *qxl_get_driver_name(struct fence *fence) +static const char *qxl_get_driver_name(struct dma_fence *fence) { return "qxl"; } -static const char *qxl_get_timeline_name(struct fence *fence) +static const char *qxl_get_timeline_name(struct dma_fence *fence) { return "release"; } -static bool qxl_nop_signaling(struct fence *fence) +static bool qxl_nop_signaling(struct dma_fence *fence) { /* fences are always automatically signaled, so just pretend we did this.. */ return true; } -static long qxl_fence_wait(struct fence *fence, bool intr, signed long timeout) +static long qxl_fence_wait(struct dma_fence *fence, bool intr, + signed long timeout) { struct qxl_device *qdev; struct qxl_release *release; @@ -71,7 +72,7 @@ static long qxl_fence_wait(struct fence *fence, bool intr, signed long timeout) retry: sc++; - if (fence_is_signaled(fence)) + if (dma_fence_is_signaled(fence)) goto signaled; qxl_io_notify_oom(qdev); @@ -80,11 +81,11 @@ retry: if (!qxl_queue_garbage_collect(qdev, true)) break; - if (fence_is_signaled(fence)) + if (dma_fence_is_signaled(fence)) goto signaled; } - if (fence_is_signaled(fence)) + if (dma_fence_is_signaled(fence)) goto signaled; if (have_drawable_releases || sc < 4) { @@ -96,9 +97,9 @@ retry: return 0; if (have_drawable_releases && sc > 300) { - FENCE_WARN(fence, "failed to wait on release %llu " - "after spincount %d\n", - fence->context & ~0xf0000000, sc); + DMA_FENCE_WARN(fence, "failed to wait on release %llu " + "after spincount %d\n", + fence->context & ~0xf0000000, sc); goto signaled; } goto retry; @@ -115,7 +116,7 @@ signaled: return end - cur; } -static const struct fence_ops qxl_fence_ops = { +static const struct dma_fence_ops qxl_fence_ops = { .get_driver_name = qxl_get_driver_name, .get_timeline_name = qxl_get_timeline_name, .enable_signaling = qxl_nop_signaling, @@ -192,8 +193,8 @@ qxl_release_free(struct qxl_device *qdev, WARN_ON(list_empty(&release->bos)); qxl_release_free_list(release); - fence_signal(&release->base); - fence_put(&release->base); + dma_fence_signal(&release->base); + dma_fence_put(&release->base); } else { qxl_release_free_list(release); kfree(release); @@ -453,9 +454,9 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) * Since we never really allocated a context and we don't want to conflict, * set the highest bits. This will break if we really allow exporting of dma-bufs. */ - fence_init(&release->base, &qxl_fence_ops, &qdev->release_lock, - release->id | 0xf0000000, release->base.seqno); - trace_fence_emit(&release->base); + dma_fence_init(&release->base, &qxl_fence_ops, &qdev->release_lock, + release->id | 0xf0000000, release->base.seqno); + trace_dma_fence_emit(&release->base); driver = bdev->driver; glob = bo->glob; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 1b0dcad916b0..44e0c5ed6418 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -66,7 +66,7 @@ #include #include #include -#include +#include #include #include @@ -367,7 +367,7 @@ struct radeon_fence_driver { }; struct radeon_fence { - struct fence base; + struct dma_fence base; struct radeon_device *rdev; uint64_t seq; @@ -746,7 +746,7 @@ struct radeon_flip_work { uint64_t base; struct drm_pending_vblank_event *event; struct radeon_bo *old_rbo; - struct fence *fence; + struct dma_fence *fence; bool async; }; @@ -2514,9 +2514,9 @@ void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v); /* * Cast helper */ -extern const struct fence_ops radeon_fence_ops; +extern const struct dma_fence_ops radeon_fence_ops; -static inline struct radeon_fence *to_radeon_fence(struct fence *f) +static inline struct radeon_fence *to_radeon_fence(struct dma_fence *f) { struct radeon_fence *__f = container_of(f, struct radeon_fence, base); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index eb92aef46e3c..36b7ac7e57e5 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1320,7 +1320,7 @@ int radeon_device_init(struct radeon_device *rdev, for (i = 0; i < RADEON_NUM_RINGS; i++) { rdev->ring[i].idx = i; } - rdev->fence_context = fence_context_alloc(RADEON_NUM_RINGS); + rdev->fence_context = dma_fence_context_alloc(RADEON_NUM_RINGS); DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", radeon_family_name[rdev->family], pdev->vendor, pdev->device, diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index cdb8cb568c15..e7409e8a9f87 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -437,7 +437,7 @@ static void radeon_flip_work_func(struct work_struct *__work) down_read(&rdev->exclusive_lock); } } else - r = fence_wait(work->fence, false); + r = dma_fence_wait(work->fence, false); if (r) DRM_ERROR("failed to wait on page flip fence (%d)!\n", r); @@ -447,7 +447,7 @@ static void radeon_flip_work_func(struct work_struct *__work) * confused about which BO the CRTC is scanning out */ - fence_put(work->fence); + dma_fence_put(work->fence); work->fence = NULL; } @@ -542,7 +542,7 @@ static int radeon_crtc_page_flip_target(struct drm_crtc *crtc, DRM_ERROR("failed to pin new rbo buffer before flip\n"); goto cleanup; } - work->fence = fence_get(reservation_object_get_excl(new_rbo->tbo.resv)); + work->fence = dma_fence_get(reservation_object_get_excl(new_rbo->tbo.resv)); radeon_bo_get_tiling_flags(new_rbo, &tiling_flags, NULL); radeon_bo_unreserve(new_rbo); @@ -617,7 +617,7 @@ pflip_cleanup: cleanup: drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base); - fence_put(work->fence); + dma_fence_put(work->fence); kfree(work); return r; } diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 7ef075acde9c..ef09f0a63754 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -141,8 +141,10 @@ int radeon_fence_emit(struct radeon_device *rdev, (*fence)->seq = seq = ++rdev->fence_drv[ring].sync_seq[ring]; (*fence)->ring = ring; (*fence)->is_vm_update = false; - fence_init(&(*fence)->base, &radeon_fence_ops, - &rdev->fence_queue.lock, rdev->fence_context + ring, seq); + dma_fence_init(&(*fence)->base, &radeon_fence_ops, + &rdev->fence_queue.lock, + rdev->fence_context + ring, + seq); radeon_fence_ring_emit(rdev, ring, *fence); trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq); radeon_fence_schedule_check(rdev, ring); @@ -169,18 +171,18 @@ static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int fl */ seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq); if (seq >= fence->seq) { - int ret = fence_signal_locked(&fence->base); + int ret = dma_fence_signal_locked(&fence->base); if (!ret) - FENCE_TRACE(&fence->base, "signaled from irq context\n"); + DMA_FENCE_TRACE(&fence->base, "signaled from irq context\n"); else - FENCE_TRACE(&fence->base, "was already signaled\n"); + DMA_FENCE_TRACE(&fence->base, "was already signaled\n"); radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring); __remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake); - fence_put(&fence->base); + dma_fence_put(&fence->base); } else - FENCE_TRACE(&fence->base, "pending\n"); + DMA_FENCE_TRACE(&fence->base, "pending\n"); return 0; } @@ -351,7 +353,7 @@ static bool radeon_fence_seq_signaled(struct radeon_device *rdev, return false; } -static bool radeon_fence_is_signaled(struct fence *f) +static bool radeon_fence_is_signaled(struct dma_fence *f) { struct radeon_fence *fence = to_radeon_fence(f); struct radeon_device *rdev = fence->rdev; @@ -381,7 +383,7 @@ static bool radeon_fence_is_signaled(struct fence *f) * to fence_queue that checks if this fence is signaled, and if so it * signals the fence and removes itself. */ -static bool radeon_fence_enable_signaling(struct fence *f) +static bool radeon_fence_enable_signaling(struct dma_fence *f) { struct radeon_fence *fence = to_radeon_fence(f); struct radeon_device *rdev = fence->rdev; @@ -414,9 +416,9 @@ static bool radeon_fence_enable_signaling(struct fence *f) fence->fence_wake.private = NULL; fence->fence_wake.func = radeon_fence_check_signaled; __add_wait_queue(&rdev->fence_queue, &fence->fence_wake); - fence_get(f); + dma_fence_get(f); - FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring); + DMA_FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring); return true; } @@ -436,9 +438,9 @@ bool radeon_fence_signaled(struct radeon_fence *fence) if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) { int ret; - ret = fence_signal(&fence->base); + ret = dma_fence_signal(&fence->base); if (!ret) - FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n"); + DMA_FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n"); return true; } return false; @@ -552,7 +554,7 @@ long radeon_fence_wait_timeout(struct radeon_fence *fence, bool intr, long timeo * exclusive_lock is not held in that case. */ if (WARN_ON_ONCE(!to_radeon_fence(&fence->base))) - return fence_wait(&fence->base, intr); + return dma_fence_wait(&fence->base, intr); seq[fence->ring] = fence->seq; r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, timeout); @@ -560,9 +562,9 @@ long radeon_fence_wait_timeout(struct radeon_fence *fence, bool intr, long timeo return r; } - r_sig = fence_signal(&fence->base); + r_sig = dma_fence_signal(&fence->base); if (!r_sig) - FENCE_TRACE(&fence->base, "signaled from fence_wait\n"); + DMA_FENCE_TRACE(&fence->base, "signaled from fence_wait\n"); return r; } @@ -697,7 +699,7 @@ int radeon_fence_wait_empty(struct radeon_device *rdev, int ring) */ struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence) { - fence_get(&fence->base); + dma_fence_get(&fence->base); return fence; } @@ -714,7 +716,7 @@ void radeon_fence_unref(struct radeon_fence **fence) *fence = NULL; if (tmp) { - fence_put(&tmp->base); + dma_fence_put(&tmp->base); } } @@ -1028,12 +1030,12 @@ int radeon_debugfs_fence_init(struct radeon_device *rdev) #endif } -static const char *radeon_fence_get_driver_name(struct fence *fence) +static const char *radeon_fence_get_driver_name(struct dma_fence *fence) { return "radeon"; } -static const char *radeon_fence_get_timeline_name(struct fence *f) +static const char *radeon_fence_get_timeline_name(struct dma_fence *f) { struct radeon_fence *fence = to_radeon_fence(f); switch (fence->ring) { @@ -1051,16 +1053,16 @@ static const char *radeon_fence_get_timeline_name(struct fence *f) static inline bool radeon_test_signaled(struct radeon_fence *fence) { - return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags); + return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->base.flags); } struct radeon_wait_cb { - struct fence_cb base; + struct dma_fence_cb base; struct task_struct *task; }; static void -radeon_fence_wait_cb(struct fence *fence, struct fence_cb *cb) +radeon_fence_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { struct radeon_wait_cb *wait = container_of(cb, struct radeon_wait_cb, base); @@ -1068,7 +1070,7 @@ radeon_fence_wait_cb(struct fence *fence, struct fence_cb *cb) wake_up_process(wait->task); } -static signed long radeon_fence_default_wait(struct fence *f, bool intr, +static signed long radeon_fence_default_wait(struct dma_fence *f, bool intr, signed long t) { struct radeon_fence *fence = to_radeon_fence(f); @@ -1077,7 +1079,7 @@ static signed long radeon_fence_default_wait(struct fence *f, bool intr, cb.task = current; - if (fence_add_callback(f, &cb.base, radeon_fence_wait_cb)) + if (dma_fence_add_callback(f, &cb.base, radeon_fence_wait_cb)) return t; while (t > 0) { @@ -1105,12 +1107,12 @@ static signed long radeon_fence_default_wait(struct fence *f, bool intr, } __set_current_state(TASK_RUNNING); - fence_remove_callback(f, &cb.base); + dma_fence_remove_callback(f, &cb.base); return t; } -const struct fence_ops radeon_fence_ops = { +const struct dma_fence_ops radeon_fence_ops = { .get_driver_name = radeon_fence_get_driver_name, .get_timeline_name = radeon_fence_get_timeline_name, .enable_signaling = radeon_fence_enable_signaling, diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c index 02ac8a1de4ff..be5d7a38d3aa 100644 --- a/drivers/gpu/drm/radeon/radeon_sync.c +++ b/drivers/gpu/drm/radeon/radeon_sync.c @@ -92,7 +92,7 @@ int radeon_sync_resv(struct radeon_device *rdev, bool shared) { struct reservation_object_list *flist; - struct fence *f; + struct dma_fence *f; struct radeon_fence *fence; unsigned i; int r = 0; @@ -103,7 +103,7 @@ int radeon_sync_resv(struct radeon_device *rdev, if (fence && fence->rdev == rdev) radeon_sync_fence(sync, fence); else if (f) - r = fence_wait(f, true); + r = dma_fence_wait(f, true); flist = reservation_object_get_list(resv); if (shared || !flist || r) @@ -116,7 +116,7 @@ int radeon_sync_resv(struct radeon_device *rdev, if (fence && fence->rdev == rdev) radeon_sync_fence(sync, fence); else - r = fence_wait(f, true); + r = dma_fence_wait(f, true); if (r) break; diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 0cd0e7bdee55..d34d1cf33895 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -467,7 +467,7 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, { int32_t *msg, msg_type, handle; unsigned img_size = 0; - struct fence *f; + struct dma_fence *f; void *ptr; int i, r; diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index fc6217dfe401..915e0d1c316a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -148,7 +148,7 @@ static void ttm_bo_release_list(struct kref *list_kref) BUG_ON(!list_empty(&bo->ddestroy)); ttm_tt_destroy(bo->ttm); atomic_dec(&bo->glob->bo_count); - fence_put(bo->moving); + dma_fence_put(bo->moving); if (bo->resv == &bo->ttm_resv) reservation_object_fini(&bo->ttm_resv); mutex_destroy(&bo->wu_mutex); @@ -426,20 +426,20 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo) static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) { struct reservation_object_list *fobj; - struct fence *fence; + struct dma_fence *fence; int i; fobj = reservation_object_get_list(bo->resv); fence = reservation_object_get_excl(bo->resv); if (fence && !fence->ops->signaled) - fence_enable_sw_signaling(fence); + dma_fence_enable_sw_signaling(fence); for (i = 0; fobj && i < fobj->shared_count; ++i) { fence = rcu_dereference_protected(fobj->shared[i], reservation_object_held(bo->resv)); if (!fence->ops->signaled) - fence_enable_sw_signaling(fence); + dma_fence_enable_sw_signaling(fence); } } @@ -792,11 +792,11 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, struct ttm_mem_type_manager *man, struct ttm_mem_reg *mem) { - struct fence *fence; + struct dma_fence *fence; int ret; spin_lock(&man->move_lock); - fence = fence_get(man->move); + fence = dma_fence_get(man->move); spin_unlock(&man->move_lock); if (fence) { @@ -806,7 +806,7 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, if (unlikely(ret)) return ret; - fence_put(bo->moving); + dma_fence_put(bo->moving); bo->moving = fence; } @@ -1286,7 +1286,7 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev, { struct ttm_mem_type_manager *man = &bdev->man[mem_type]; struct ttm_bo_global *glob = bdev->glob; - struct fence *fence; + struct dma_fence *fence; int ret; /* @@ -1309,12 +1309,12 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev, spin_unlock(&glob->lru_lock); spin_lock(&man->move_lock); - fence = fence_get(man->move); + fence = dma_fence_get(man->move); spin_unlock(&man->move_lock); if (fence) { - ret = fence_wait(fence, false); - fence_put(fence); + ret = dma_fence_wait(fence, false); + dma_fence_put(fence); if (ret) { if (allow_errors) { return ret; @@ -1343,7 +1343,7 @@ int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type) mem_type); return ret; } - fence_put(man->move); + dma_fence_put(man->move); man->use_type = false; man->has_type = false; diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index bf6e21655c57..d0459b392e5e 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -644,7 +644,7 @@ void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map) EXPORT_SYMBOL(ttm_bo_kunmap); int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, - struct fence *fence, + struct dma_fence *fence, bool evict, struct ttm_mem_reg *new_mem) { @@ -674,8 +674,8 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, * operation has completed. */ - fence_put(bo->moving); - bo->moving = fence_get(fence); + dma_fence_put(bo->moving); + bo->moving = dma_fence_get(fence); ret = ttm_buffer_object_transfer(bo, &ghost_obj); if (ret) @@ -706,7 +706,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, EXPORT_SYMBOL(ttm_bo_move_accel_cleanup); int ttm_bo_pipeline_move(struct ttm_buffer_object *bo, - struct fence *fence, bool evict, + struct dma_fence *fence, bool evict, struct ttm_mem_reg *new_mem) { struct ttm_bo_device *bdev = bo->bdev; @@ -730,8 +730,8 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object *bo, * operation has completed. */ - fence_put(bo->moving); - bo->moving = fence_get(fence); + dma_fence_put(bo->moving); + bo->moving = dma_fence_get(fence); ret = ttm_buffer_object_transfer(bo, &ghost_obj); if (ret) @@ -761,16 +761,16 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object *bo, */ spin_lock(&from->move_lock); - if (!from->move || fence_is_later(fence, from->move)) { - fence_put(from->move); - from->move = fence_get(fence); + if (!from->move || dma_fence_is_later(fence, from->move)) { + dma_fence_put(from->move); + from->move = dma_fence_get(fence); } spin_unlock(&from->move_lock); ttm_bo_free_old_node(bo); - fence_put(bo->moving); - bo->moving = fence_get(fence); + dma_fence_put(bo->moving); + bo->moving = dma_fence_get(fence); } else { /** diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index a6ed9d5e5167..4748aedc933a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -54,7 +54,7 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, /* * Quick non-stalling check for idle. */ - if (fence_is_signaled(bo->moving)) + if (dma_fence_is_signaled(bo->moving)) goto out_clear; /* @@ -67,14 +67,14 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, goto out_unlock; up_read(&vma->vm_mm->mmap_sem); - (void) fence_wait(bo->moving, true); + (void) dma_fence_wait(bo->moving, true); goto out_unlock; } /* * Ordinary wait. */ - ret = fence_wait(bo->moving, true); + ret = dma_fence_wait(bo->moving, true); if (unlikely(ret != 0)) { ret = (ret != -ERESTARTSYS) ? VM_FAULT_SIGBUS : VM_FAULT_NOPAGE; @@ -82,7 +82,7 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, } out_clear: - fence_put(bo->moving); + dma_fence_put(bo->moving); bo->moving = NULL; out_unlock: diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index a80717b35dc6..d35bc491e8de 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -179,7 +179,8 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, EXPORT_SYMBOL(ttm_eu_reserve_buffers); void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, - struct list_head *list, struct fence *fence) + struct list_head *list, + struct dma_fence *fence) { struct ttm_validate_buffer *entry; struct ttm_buffer_object *bo; diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c index 5c57c1ffa1f9..488909a21ed8 100644 --- a/drivers/gpu/drm/vgem/vgem_fence.c +++ b/drivers/gpu/drm/vgem/vgem_fence.c @@ -28,56 +28,57 @@ #define VGEM_FENCE_TIMEOUT (10*HZ) struct vgem_fence { - struct fence base; + struct dma_fence base; struct spinlock lock; struct timer_list timer; }; -static const char *vgem_fence_get_driver_name(struct fence *fence) +static const char *vgem_fence_get_driver_name(struct dma_fence *fence) { return "vgem"; } -static const char *vgem_fence_get_timeline_name(struct fence *fence) +static const char *vgem_fence_get_timeline_name(struct dma_fence *fence) { return "unbound"; } -static bool vgem_fence_signaled(struct fence *fence) +static bool vgem_fence_signaled(struct dma_fence *fence) { return false; } -static bool vgem_fence_enable_signaling(struct fence *fence) +static bool vgem_fence_enable_signaling(struct dma_fence *fence) { return true; } -static void vgem_fence_release(struct fence *base) +static void vgem_fence_release(struct dma_fence *base) { struct vgem_fence *fence = container_of(base, typeof(*fence), base); del_timer_sync(&fence->timer); - fence_free(&fence->base); + dma_fence_free(&fence->base); } -static void vgem_fence_value_str(struct fence *fence, char *str, int size) +static void vgem_fence_value_str(struct dma_fence *fence, char *str, int size) { snprintf(str, size, "%u", fence->seqno); } -static void vgem_fence_timeline_value_str(struct fence *fence, char *str, +static void vgem_fence_timeline_value_str(struct dma_fence *fence, char *str, int size) { - snprintf(str, size, "%u", fence_is_signaled(fence) ? fence->seqno : 0); + snprintf(str, size, "%u", + dma_fence_is_signaled(fence) ? fence->seqno : 0); } -static const struct fence_ops vgem_fence_ops = { +static const struct dma_fence_ops vgem_fence_ops = { .get_driver_name = vgem_fence_get_driver_name, .get_timeline_name = vgem_fence_get_timeline_name, .enable_signaling = vgem_fence_enable_signaling, .signaled = vgem_fence_signaled, - .wait = fence_default_wait, + .wait = dma_fence_default_wait, .release = vgem_fence_release, .fence_value_str = vgem_fence_value_str, @@ -88,11 +89,11 @@ static void vgem_fence_timeout(unsigned long data) { struct vgem_fence *fence = (struct vgem_fence *)data; - fence_signal(&fence->base); + dma_fence_signal(&fence->base); } -static struct fence *vgem_fence_create(struct vgem_file *vfile, - unsigned int flags) +static struct dma_fence *vgem_fence_create(struct vgem_file *vfile, + unsigned int flags) { struct vgem_fence *fence; @@ -101,8 +102,8 @@ static struct fence *vgem_fence_create(struct vgem_file *vfile, return NULL; spin_lock_init(&fence->lock); - fence_init(&fence->base, &vgem_fence_ops, &fence->lock, - fence_context_alloc(1), 1); + dma_fence_init(&fence->base, &vgem_fence_ops, &fence->lock, + dma_fence_context_alloc(1), 1); setup_timer(&fence->timer, vgem_fence_timeout, (unsigned long)fence); @@ -157,7 +158,7 @@ int vgem_fence_attach_ioctl(struct drm_device *dev, struct vgem_file *vfile = file->driver_priv; struct reservation_object *resv; struct drm_gem_object *obj; - struct fence *fence; + struct dma_fence *fence; int ret; if (arg->flags & ~VGEM_FENCE_WRITE) @@ -209,8 +210,8 @@ int vgem_fence_attach_ioctl(struct drm_device *dev, } err_fence: if (ret) { - fence_signal(fence); - fence_put(fence); + dma_fence_signal(fence); + dma_fence_put(fence); } err: drm_gem_object_unreference_unlocked(obj); @@ -239,7 +240,7 @@ int vgem_fence_signal_ioctl(struct drm_device *dev, { struct vgem_file *vfile = file->driver_priv; struct drm_vgem_fence_signal *arg = data; - struct fence *fence; + struct dma_fence *fence; int ret = 0; if (arg->flags) @@ -253,11 +254,11 @@ int vgem_fence_signal_ioctl(struct drm_device *dev, if (IS_ERR(fence)) return PTR_ERR(fence); - if (fence_is_signaled(fence)) + if (dma_fence_is_signaled(fence)) ret = -ETIMEDOUT; - fence_signal(fence); - fence_put(fence); + dma_fence_signal(fence); + dma_fence_put(fence); return ret; } @@ -271,8 +272,8 @@ int vgem_fence_open(struct vgem_file *vfile) static int __vgem_fence_idr_fini(int id, void *p, void *data) { - fence_signal(p); - fence_put(p); + dma_fence_signal(p); + dma_fence_put(p); return 0; } diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index ae59080d63d1..ec1ebdcfe80b 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -82,7 +82,7 @@ struct virtio_gpu_fence_driver { }; struct virtio_gpu_fence { - struct fence f; + struct dma_fence f; struct virtio_gpu_fence_driver *drv; struct list_head node; uint64_t seq; diff --git a/drivers/gpu/drm/virtio/virtgpu_fence.c b/drivers/gpu/drm/virtio/virtgpu_fence.c index f3f70fa8a4c7..23353521f903 100644 --- a/drivers/gpu/drm/virtio/virtgpu_fence.c +++ b/drivers/gpu/drm/virtio/virtgpu_fence.c @@ -26,22 +26,22 @@ #include #include "virtgpu_drv.h" -static const char *virtio_get_driver_name(struct fence *f) +static const char *virtio_get_driver_name(struct dma_fence *f) { return "virtio_gpu"; } -static const char *virtio_get_timeline_name(struct fence *f) +static const char *virtio_get_timeline_name(struct dma_fence *f) { return "controlq"; } -static bool virtio_enable_signaling(struct fence *f) +static bool virtio_enable_signaling(struct dma_fence *f) { return true; } -static bool virtio_signaled(struct fence *f) +static bool virtio_signaled(struct dma_fence *f) { struct virtio_gpu_fence *fence = to_virtio_fence(f); @@ -50,26 +50,26 @@ static bool virtio_signaled(struct fence *f) return false; } -static void virtio_fence_value_str(struct fence *f, char *str, int size) +static void virtio_fence_value_str(struct dma_fence *f, char *str, int size) { struct virtio_gpu_fence *fence = to_virtio_fence(f); snprintf(str, size, "%llu", fence->seq); } -static void virtio_timeline_value_str(struct fence *f, char *str, int size) +static void virtio_timeline_value_str(struct dma_fence *f, char *str, int size) { struct virtio_gpu_fence *fence = to_virtio_fence(f); snprintf(str, size, "%llu", (u64)atomic64_read(&fence->drv->last_seq)); } -static const struct fence_ops virtio_fence_ops = { +static const struct dma_fence_ops virtio_fence_ops = { .get_driver_name = virtio_get_driver_name, .get_timeline_name = virtio_get_timeline_name, .enable_signaling = virtio_enable_signaling, .signaled = virtio_signaled, - .wait = fence_default_wait, + .wait = dma_fence_default_wait, .fence_value_str = virtio_fence_value_str, .timeline_value_str = virtio_timeline_value_str, }; @@ -88,9 +88,9 @@ int virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev, spin_lock_irqsave(&drv->lock, irq_flags); (*fence)->drv = drv; (*fence)->seq = ++drv->sync_seq; - fence_init(&(*fence)->f, &virtio_fence_ops, &drv->lock, - drv->context, (*fence)->seq); - fence_get(&(*fence)->f); + dma_fence_init(&(*fence)->f, &virtio_fence_ops, &drv->lock, + drv->context, (*fence)->seq); + dma_fence_get(&(*fence)->f); list_add_tail(&(*fence)->node, &drv->fences); spin_unlock_irqrestore(&drv->lock, irq_flags); @@ -111,9 +111,9 @@ void virtio_gpu_fence_event_process(struct virtio_gpu_device *vgdev, list_for_each_entry_safe(fence, tmp, &drv->fences, node) { if (last_seq < fence->seq) continue; - fence_signal_locked(&fence->f); + dma_fence_signal_locked(&fence->f); list_del(&fence->node); - fence_put(&fence->f); + dma_fence_put(&fence->f); } spin_unlock_irqrestore(&drv->lock, irq_flags); } diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 818478b4c4f0..61f3a963af95 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -172,7 +172,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data, /* fence the command bo */ virtio_gpu_unref_list(&validate_list); drm_free_large(buflist); - fence_put(&fence->f); + dma_fence_put(&fence->f); return 0; out_unresv: @@ -298,7 +298,7 @@ static int virtio_gpu_resource_create_ioctl(struct drm_device *dev, void *data, drm_gem_object_release(obj); if (vgdev->has_virgl_3d) { virtio_gpu_unref_list(&validate_list); - fence_put(&fence->f); + dma_fence_put(&fence->f); } return ret; } @@ -309,13 +309,13 @@ static int virtio_gpu_resource_create_ioctl(struct drm_device *dev, void *data, if (vgdev->has_virgl_3d) { virtio_gpu_unref_list(&validate_list); - fence_put(&fence->f); + dma_fence_put(&fence->f); } return 0; fail_unref: if (vgdev->has_virgl_3d) { virtio_gpu_unref_list(&validate_list); - fence_put(&fence->f); + dma_fence_put(&fence->f); } //fail_obj: // drm_gem_object_handle_unreference_unlocked(obj); @@ -383,7 +383,7 @@ static int virtio_gpu_transfer_from_host_ioctl(struct drm_device *dev, reservation_object_add_excl_fence(qobj->tbo.resv, &fence->f); - fence_put(&fence->f); + dma_fence_put(&fence->f); out_unres: virtio_gpu_object_unreserve(qobj); out: @@ -431,7 +431,7 @@ static int virtio_gpu_transfer_to_host_ioctl(struct drm_device *dev, void *data, args->level, &box, &fence); reservation_object_add_excl_fence(qobj->tbo.resv, &fence->f); - fence_put(&fence->f); + dma_fence_put(&fence->f); } out_unres: diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index 036b0fbae0fb..1235519853f4 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -159,7 +159,7 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags) virtio_gpu_init_vq(&vgdev->ctrlq, virtio_gpu_dequeue_ctrl_func); virtio_gpu_init_vq(&vgdev->cursorq, virtio_gpu_dequeue_cursor_func); - vgdev->fence_drv.context = fence_context_alloc(1); + vgdev->fence_drv.context = dma_fence_context_alloc(1); spin_lock_init(&vgdev->fence_drv.lock); INIT_LIST_HEAD(&vgdev->fence_drv.fences); INIT_LIST_HEAD(&vgdev->cap_cache); diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c index ba28c0f6f28a..cb75f0663ba0 100644 --- a/drivers/gpu/drm/virtio/virtgpu_plane.c +++ b/drivers/gpu/drm/virtio/virtgpu_plane.c @@ -152,7 +152,7 @@ static void virtio_gpu_cursor_plane_update(struct drm_plane *plane, if (!ret) { reservation_object_add_excl_fence(bo->tbo.resv, &fence->f); - fence_put(&fence->f); + dma_fence_put(&fence->f); fence = NULL; virtio_gpu_object_unreserve(bo); virtio_gpu_object_wait(bo, false); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 26ac8e80a478..6541dd8b82dc 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -108,7 +108,7 @@ fman_from_fence(struct vmw_fence_obj *fence) * objects with actions attached to them. */ -static void vmw_fence_obj_destroy(struct fence *f) +static void vmw_fence_obj_destroy(struct dma_fence *f) { struct vmw_fence_obj *fence = container_of(f, struct vmw_fence_obj, base); @@ -123,17 +123,17 @@ static void vmw_fence_obj_destroy(struct fence *f) fence->destroy(fence); } -static const char *vmw_fence_get_driver_name(struct fence *f) +static const char *vmw_fence_get_driver_name(struct dma_fence *f) { return "vmwgfx"; } -static const char *vmw_fence_get_timeline_name(struct fence *f) +static const char *vmw_fence_get_timeline_name(struct dma_fence *f) { return "svga"; } -static bool vmw_fence_enable_signaling(struct fence *f) +static bool vmw_fence_enable_signaling(struct dma_fence *f) { struct vmw_fence_obj *fence = container_of(f, struct vmw_fence_obj, base); @@ -152,12 +152,12 @@ static bool vmw_fence_enable_signaling(struct fence *f) } struct vmwgfx_wait_cb { - struct fence_cb base; + struct dma_fence_cb base; struct task_struct *task; }; static void -vmwgfx_wait_cb(struct fence *fence, struct fence_cb *cb) +vmwgfx_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { struct vmwgfx_wait_cb *wait = container_of(cb, struct vmwgfx_wait_cb, base); @@ -167,7 +167,7 @@ vmwgfx_wait_cb(struct fence *fence, struct fence_cb *cb) static void __vmw_fences_update(struct vmw_fence_manager *fman); -static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout) +static long vmw_fence_wait(struct dma_fence *f, bool intr, signed long timeout) { struct vmw_fence_obj *fence = container_of(f, struct vmw_fence_obj, base); @@ -197,7 +197,7 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout) while (ret > 0) { __vmw_fences_update(fman); - if (test_bit(FENCE_FLAG_SIGNALED_BIT, &f->flags)) + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &f->flags)) break; if (intr) @@ -225,7 +225,7 @@ out: return ret; } -static struct fence_ops vmw_fence_ops = { +static struct dma_fence_ops vmw_fence_ops = { .get_driver_name = vmw_fence_get_driver_name, .get_timeline_name = vmw_fence_get_timeline_name, .enable_signaling = vmw_fence_enable_signaling, @@ -298,7 +298,7 @@ struct vmw_fence_manager *vmw_fence_manager_init(struct vmw_private *dev_priv) fman->event_fence_action_size = ttm_round_pot(sizeof(struct vmw_event_fence_action)); mutex_init(&fman->goal_irq_mutex); - fman->ctx = fence_context_alloc(1); + fman->ctx = dma_fence_context_alloc(1); return fman; } @@ -326,8 +326,8 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman, unsigned long irq_flags; int ret = 0; - fence_init(&fence->base, &vmw_fence_ops, &fman->lock, - fman->ctx, seqno); + dma_fence_init(&fence->base, &vmw_fence_ops, &fman->lock, + fman->ctx, seqno); INIT_LIST_HEAD(&fence->seq_passed_actions); fence->destroy = destroy; @@ -431,7 +431,7 @@ static bool vmw_fence_goal_check_locked(struct vmw_fence_obj *fence) u32 goal_seqno; u32 *fifo_mem; - if (fence_is_signaled_locked(&fence->base)) + if (dma_fence_is_signaled_locked(&fence->base)) return false; fifo_mem = fman->dev_priv->mmio_virt; @@ -459,7 +459,7 @@ rerun: list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) { if (seqno - fence->base.seqno < VMW_FENCE_WRAP) { list_del_init(&fence->head); - fence_signal_locked(&fence->base); + dma_fence_signal_locked(&fence->base); INIT_LIST_HEAD(&action_list); list_splice_init(&fence->seq_passed_actions, &action_list); @@ -500,18 +500,18 @@ bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence) { struct vmw_fence_manager *fman = fman_from_fence(fence); - if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags)) + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->base.flags)) return 1; vmw_fences_update(fman); - return fence_is_signaled(&fence->base); + return dma_fence_is_signaled(&fence->base); } int vmw_fence_obj_wait(struct vmw_fence_obj *fence, bool lazy, bool interruptible, unsigned long timeout) { - long ret = fence_wait_timeout(&fence->base, interruptible, timeout); + long ret = dma_fence_wait_timeout(&fence->base, interruptible, timeout); if (likely(ret > 0)) return 0; @@ -530,7 +530,7 @@ void vmw_fence_obj_flush(struct vmw_fence_obj *fence) static void vmw_fence_destroy(struct vmw_fence_obj *fence) { - fence_free(&fence->base); + dma_fence_free(&fence->base); } int vmw_fence_create(struct vmw_fence_manager *fman, @@ -669,7 +669,7 @@ void vmw_fence_fifo_down(struct vmw_fence_manager *fman) struct vmw_fence_obj *fence = list_entry(fman->fence_list.prev, struct vmw_fence_obj, head); - fence_get(&fence->base); + dma_fence_get(&fence->base); spin_unlock_irq(&fman->lock); ret = vmw_fence_obj_wait(fence, false, false, @@ -677,7 +677,7 @@ void vmw_fence_fifo_down(struct vmw_fence_manager *fman) if (unlikely(ret != 0)) { list_del_init(&fence->head); - fence_signal(&fence->base); + dma_fence_signal(&fence->base); INIT_LIST_HEAD(&action_list); list_splice_init(&fence->seq_passed_actions, &action_list); @@ -685,7 +685,7 @@ void vmw_fence_fifo_down(struct vmw_fence_manager *fman) } BUG_ON(!list_empty(&fence->head)); - fence_put(&fence->base); + dma_fence_put(&fence->base); spin_lock_irq(&fman->lock); } spin_unlock_irq(&fman->lock); @@ -884,7 +884,7 @@ static void vmw_fence_obj_add_action(struct vmw_fence_obj *fence, spin_lock_irqsave(&fman->lock, irq_flags); fman->pending_actions[action->type]++; - if (fence_is_signaled_locked(&fence->base)) { + if (dma_fence_is_signaled_locked(&fence->base)) { struct list_head action_list; INIT_LIST_HEAD(&action_list); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h index 83ae301ee141..d9d85aa6ed20 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h @@ -27,7 +27,7 @@ #ifndef _VMWGFX_FENCE_H_ -#include +#include #define VMW_FENCE_WAIT_TIMEOUT (5*HZ) @@ -52,7 +52,7 @@ struct vmw_fence_action { }; struct vmw_fence_obj { - struct fence base; + struct dma_fence base; struct list_head head; struct list_head seq_passed_actions; @@ -71,14 +71,14 @@ vmw_fence_obj_unreference(struct vmw_fence_obj **fence_p) *fence_p = NULL; if (fence) - fence_put(&fence->base); + dma_fence_put(&fence->base); } static inline struct vmw_fence_obj * vmw_fence_obj_reference(struct vmw_fence_obj *fence) { if (fence) - fence_get(&fence->base); + dma_fence_get(&fence->base); return fence; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 1a85fb2d4dc6..8e86d6d4141b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -1454,7 +1454,7 @@ void vmw_fence_single_bo(struct ttm_buffer_object *bo, if (fence == NULL) { vmw_execbuf_fence_commands(NULL, dev_priv, &fence, NULL); reservation_object_add_excl_fence(bo->resv, &fence->base); - fence_put(&fence->base); + dma_fence_put(&fence->base); } else reservation_object_add_excl_fence(bo->resv, &fence->base); } diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 672644031bd5..e336e3901876 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -57,7 +57,7 @@ #include #include #include -#include +#include #include #include @@ -362,7 +362,7 @@ struct drm_ioctl_desc { struct drm_pending_event { struct completion *completion; struct drm_event *event; - struct fence *fence; + struct dma_fence *fence; struct list_head link; struct list_head pending_link; struct drm_file *file_priv; diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index bc860cfc67ca..fa1aa214c8ea 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -54,7 +54,7 @@ struct drm_mode_set; struct drm_file; struct drm_clip_rect; struct device_node; -struct fence; +struct dma_fence; struct edid; static inline int64_t U642I64(uint64_t val) diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index 98b39d66eb32..c5e8a0df1623 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -59,7 +59,7 @@ struct drm_plane_state { struct drm_crtc *crtc; /* do not write directly, use drm_atomic_set_crtc_for_plane() */ struct drm_framebuffer *fb; /* do not write directly, use drm_atomic_set_fb_for_plane() */ - struct fence *fence; + struct dma_fence *fence; /* Signed dest location allows it to be partially off screen */ int32_t crtc_x, crtc_y; diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 9eb940d6755f..5beae7969bf7 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -209,7 +209,7 @@ struct ttm_buffer_object { * Members protected by a bo reservation. */ - struct fence *moving; + struct dma_fence *moving; struct drm_vma_offset_node vma_node; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 4f0a92185995..27e9c26c9150 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -303,7 +303,7 @@ struct ttm_mem_type_manager { /* * Protected by @move_lock. */ - struct fence *move; + struct dma_fence *move; }; /** @@ -1025,7 +1025,7 @@ extern void ttm_bo_free_old_node(struct ttm_buffer_object *bo); */ extern int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, - struct fence *fence, bool evict, + struct dma_fence *fence, bool evict, struct ttm_mem_reg *new_mem); /** @@ -1040,7 +1040,7 @@ extern int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, * immediately or hang it on a temporary buffer object. */ int ttm_bo_pipeline_move(struct ttm_buffer_object *bo, - struct fence *fence, bool evict, + struct dma_fence *fence, bool evict, struct ttm_mem_reg *new_mem); /** diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h index b620c317c772..47f35b8e6d09 100644 --- a/include/drm/ttm/ttm_execbuf_util.h +++ b/include/drm/ttm/ttm_execbuf_util.h @@ -114,6 +114,6 @@ extern int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, extern void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, struct list_head *list, - struct fence *fence); + struct dma_fence *fence); #endif diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index e0b0741ae671..8daeb3ce0016 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include struct device; @@ -143,7 +143,7 @@ struct dma_buf { wait_queue_head_t poll; struct dma_buf_poll_cb_t { - struct fence_cb cb; + struct dma_fence_cb cb; wait_queue_head_t *poll; unsigned long active; diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h new file mode 100644 index 000000000000..5900945f962d --- /dev/null +++ b/include/linux/dma-fence-array.h @@ -0,0 +1,86 @@ +/* + * fence-array: aggregates fence to be waited together + * + * Copyright (C) 2016 Collabora Ltd + * Copyright (C) 2016 Advanced Micro Devices, Inc. + * Authors: + * Gustavo Padovan + * Christian König + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __LINUX_DMA_FENCE_ARRAY_H +#define __LINUX_DMA_FENCE_ARRAY_H + +#include + +/** + * struct dma_fence_array_cb - callback helper for fence array + * @cb: fence callback structure for signaling + * @array: reference to the parent fence array object + */ +struct dma_fence_array_cb { + struct dma_fence_cb cb; + struct dma_fence_array *array; +}; + +/** + * struct dma_fence_array - fence to represent an array of fences + * @base: fence base class + * @lock: spinlock for fence handling + * @num_fences: number of fences in the array + * @num_pending: fences in the array still pending + * @fences: array of the fences + */ +struct dma_fence_array { + struct dma_fence base; + + spinlock_t lock; + unsigned num_fences; + atomic_t num_pending; + struct dma_fence **fences; +}; + +extern const struct dma_fence_ops dma_fence_array_ops; + +/** + * dma_fence_is_array - check if a fence is from the array subsclass + * @fence: fence to test + * + * Return true if it is a dma_fence_array and false otherwise. + */ +static inline bool dma_fence_is_array(struct dma_fence *fence) +{ + return fence->ops == &dma_fence_array_ops; +} + +/** + * to_dma_fence_array - cast a fence to a dma_fence_array + * @fence: fence to cast to a dma_fence_array + * + * Returns NULL if the fence is not a dma_fence_array, + * or the dma_fence_array otherwise. + */ +static inline struct dma_fence_array * +to_dma_fence_array(struct dma_fence *fence) +{ + if (fence->ops != &dma_fence_array_ops) + return NULL; + + return container_of(fence, struct dma_fence_array, base); +} + +struct dma_fence_array *dma_fence_array_create(int num_fences, + struct dma_fence **fences, + u64 context, unsigned seqno, + bool signal_on_any); + +#endif /* __LINUX_DMA_FENCE_ARRAY_H */ diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h new file mode 100644 index 000000000000..ba60c043a5d3 --- /dev/null +++ b/include/linux/dma-fence.h @@ -0,0 +1,437 @@ +/* + * Fence mechanism for dma-buf to allow for asynchronous dma access + * + * Copyright (C) 2012 Canonical Ltd + * Copyright (C) 2012 Texas Instruments + * + * Authors: + * Rob Clark + * Maarten Lankhorst + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __LINUX_DMA_FENCE_H +#define __LINUX_DMA_FENCE_H + +#include +#include +#include +#include +#include +#include +#include +#include + +struct dma_fence; +struct dma_fence_ops; +struct dma_fence_cb; + +/** + * struct dma_fence - software synchronization primitive + * @refcount: refcount for this fence + * @ops: dma_fence_ops associated with this fence + * @rcu: used for releasing fence with kfree_rcu + * @cb_list: list of all callbacks to call + * @lock: spin_lock_irqsave used for locking + * @context: execution context this fence belongs to, returned by + * dma_fence_context_alloc() + * @seqno: the sequence number of this fence inside the execution context, + * can be compared to decide which fence would be signaled later. + * @flags: A mask of DMA_FENCE_FLAG_* defined below + * @timestamp: Timestamp when the fence was signaled. + * @status: Optional, only valid if < 0, must be set before calling + * dma_fence_signal, indicates that the fence has completed with an error. + * + * the flags member must be manipulated and read using the appropriate + * atomic ops (bit_*), so taking the spinlock will not be needed most + * of the time. + * + * DMA_FENCE_FLAG_SIGNALED_BIT - fence is already signaled + * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called + * DMA_FENCE_FLAG_USER_BITS - start of the unused bits, can be used by the + * implementer of the fence for its own purposes. Can be used in different + * ways by different fence implementers, so do not rely on this. + * + * Since atomic bitops are used, this is not guaranteed to be the case. + * Particularly, if the bit was set, but dma_fence_signal was called right + * before this bit was set, it would have been able to set the + * DMA_FENCE_FLAG_SIGNALED_BIT, before enable_signaling was called. + * Adding a check for DMA_FENCE_FLAG_SIGNALED_BIT after setting + * DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT closes this race, and makes sure that + * after dma_fence_signal was called, any enable_signaling call will have either + * been completed, or never called at all. + */ +struct dma_fence { + struct kref refcount; + const struct dma_fence_ops *ops; + struct rcu_head rcu; + struct list_head cb_list; + spinlock_t *lock; + u64 context; + unsigned seqno; + unsigned long flags; + ktime_t timestamp; + int status; +}; + +enum dma_fence_flag_bits { + DMA_FENCE_FLAG_SIGNALED_BIT, + DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + DMA_FENCE_FLAG_USER_BITS, /* must always be last member */ +}; + +typedef void (*dma_fence_func_t)(struct dma_fence *fence, + struct dma_fence_cb *cb); + +/** + * struct dma_fence_cb - callback for dma_fence_add_callback + * @node: used by dma_fence_add_callback to append this struct to fence::cb_list + * @func: dma_fence_func_t to call + * + * This struct will be initialized by dma_fence_add_callback, additional + * data can be passed along by embedding dma_fence_cb in another struct. + */ +struct dma_fence_cb { + struct list_head node; + dma_fence_func_t func; +}; + +/** + * struct dma_fence_ops - operations implemented for fence + * @get_driver_name: returns the driver name. + * @get_timeline_name: return the name of the context this fence belongs to. + * @enable_signaling: enable software signaling of fence. + * @signaled: [optional] peek whether the fence is signaled, can be null. + * @wait: custom wait implementation, or dma_fence_default_wait. + * @release: [optional] called on destruction of fence, can be null + * @fill_driver_data: [optional] callback to fill in free-form debug info + * Returns amount of bytes filled, or -errno. + * @fence_value_str: [optional] fills in the value of the fence as a string + * @timeline_value_str: [optional] fills in the current value of the timeline + * as a string + * + * Notes on enable_signaling: + * For fence implementations that have the capability for hw->hw + * signaling, they can implement this op to enable the necessary + * irqs, or insert commands into cmdstream, etc. This is called + * in the first wait() or add_callback() path to let the fence + * implementation know that there is another driver waiting on + * the signal (ie. hw->sw case). + * + * This function can be called called from atomic context, but not + * from irq context, so normal spinlocks can be used. + * + * A return value of false indicates the fence already passed, + * or some failure occurred that made it impossible to enable + * signaling. True indicates successful enabling. + * + * fence->status may be set in enable_signaling, but only when false is + * returned. + * + * Calling dma_fence_signal before enable_signaling is called allows + * for a tiny race window in which enable_signaling is called during, + * before, or after dma_fence_signal. To fight this, it is recommended + * that before enable_signaling returns true an extra reference is + * taken on the fence, to be released when the fence is signaled. + * This will mean dma_fence_signal will still be called twice, but + * the second time will be a noop since it was already signaled. + * + * Notes on signaled: + * May set fence->status if returning true. + * + * Notes on wait: + * Must not be NULL, set to dma_fence_default_wait for default implementation. + * the dma_fence_default_wait implementation should work for any fence, as long + * as enable_signaling works correctly. + * + * Must return -ERESTARTSYS if the wait is intr = true and the wait was + * interrupted, and remaining jiffies if fence has signaled, or 0 if wait + * timed out. Can also return other error values on custom implementations, + * which should be treated as if the fence is signaled. For example a hardware + * lockup could be reported like that. + * + * Notes on release: + * Can be NULL, this function allows additional commands to run on + * destruction of the fence. Can be called from irq context. + * If pointer is set to NULL, kfree will get called instead. + */ + +struct dma_fence_ops { + const char * (*get_driver_name)(struct dma_fence *fence); + const char * (*get_timeline_name)(struct dma_fence *fence); + bool (*enable_signaling)(struct dma_fence *fence); + bool (*signaled)(struct dma_fence *fence); + signed long (*wait)(struct dma_fence *fence, + bool intr, signed long timeout); + void (*release)(struct dma_fence *fence); + + int (*fill_driver_data)(struct dma_fence *fence, void *data, int size); + void (*fence_value_str)(struct dma_fence *fence, char *str, int size); + void (*timeline_value_str)(struct dma_fence *fence, + char *str, int size); +}; + +void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, + spinlock_t *lock, u64 context, unsigned seqno); + +void dma_fence_release(struct kref *kref); +void dma_fence_free(struct dma_fence *fence); + +/** + * dma_fence_put - decreases refcount of the fence + * @fence: [in] fence to reduce refcount of + */ +static inline void dma_fence_put(struct dma_fence *fence) +{ + if (fence) + kref_put(&fence->refcount, dma_fence_release); +} + +/** + * dma_fence_get - increases refcount of the fence + * @fence: [in] fence to increase refcount of + * + * Returns the same fence, with refcount increased by 1. + */ +static inline struct dma_fence *dma_fence_get(struct dma_fence *fence) +{ + if (fence) + kref_get(&fence->refcount); + return fence; +} + +/** + * dma_fence_get_rcu - get a fence from a reservation_object_list with + * rcu read lock + * @fence: [in] fence to increase refcount of + * + * Function returns NULL if no refcount could be obtained, or the fence. + */ +static inline struct dma_fence *dma_fence_get_rcu(struct dma_fence *fence) +{ + if (kref_get_unless_zero(&fence->refcount)) + return fence; + else + return NULL; +} + +/** + * dma_fence_get_rcu_safe - acquire a reference to an RCU tracked fence + * @fence: [in] pointer to fence to increase refcount of + * + * Function returns NULL if no refcount could be obtained, or the fence. + * This function handles acquiring a reference to a fence that may be + * reallocated within the RCU grace period (such as with SLAB_DESTROY_BY_RCU), + * so long as the caller is using RCU on the pointer to the fence. + * + * An alternative mechanism is to employ a seqlock to protect a bunch of + * fences, such as used by struct reservation_object. When using a seqlock, + * the seqlock must be taken before and checked after a reference to the + * fence is acquired (as shown here). + * + * The caller is required to hold the RCU read lock. + */ +static inline struct dma_fence * +dma_fence_get_rcu_safe(struct dma_fence * __rcu *fencep) +{ + do { + struct dma_fence *fence; + + fence = rcu_dereference(*fencep); + if (!fence || !dma_fence_get_rcu(fence)) + return NULL; + + /* The atomic_inc_not_zero() inside dma_fence_get_rcu() + * provides a full memory barrier upon success (such as now). + * This is paired with the write barrier from assigning + * to the __rcu protected fence pointer so that if that + * pointer still matches the current fence, we know we + * have successfully acquire a reference to it. If it no + * longer matches, we are holding a reference to some other + * reallocated pointer. This is possible if the allocator + * is using a freelist like SLAB_DESTROY_BY_RCU where the + * fence remains valid for the RCU grace period, but it + * may be reallocated. When using such allocators, we are + * responsible for ensuring the reference we get is to + * the right fence, as below. + */ + if (fence == rcu_access_pointer(*fencep)) + return rcu_pointer_handoff(fence); + + dma_fence_put(fence); + } while (1); +} + +int dma_fence_signal(struct dma_fence *fence); +int dma_fence_signal_locked(struct dma_fence *fence); +signed long dma_fence_default_wait(struct dma_fence *fence, + bool intr, signed long timeout); +int dma_fence_add_callback(struct dma_fence *fence, + struct dma_fence_cb *cb, + dma_fence_func_t func); +bool dma_fence_remove_callback(struct dma_fence *fence, + struct dma_fence_cb *cb); +void dma_fence_enable_sw_signaling(struct dma_fence *fence); + +/** + * dma_fence_is_signaled_locked - Return an indication if the fence + * is signaled yet. + * @fence: [in] the fence to check + * + * Returns true if the fence was already signaled, false if not. Since this + * function doesn't enable signaling, it is not guaranteed to ever return + * true if dma_fence_add_callback, dma_fence_wait or + * dma_fence_enable_sw_signaling haven't been called before. + * + * This function requires fence->lock to be held. + */ +static inline bool +dma_fence_is_signaled_locked(struct dma_fence *fence) +{ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + return true; + + if (fence->ops->signaled && fence->ops->signaled(fence)) { + dma_fence_signal_locked(fence); + return true; + } + + return false; +} + +/** + * dma_fence_is_signaled - Return an indication if the fence is signaled yet. + * @fence: [in] the fence to check + * + * Returns true if the fence was already signaled, false if not. Since this + * function doesn't enable signaling, it is not guaranteed to ever return + * true if dma_fence_add_callback, dma_fence_wait or + * dma_fence_enable_sw_signaling haven't been called before. + * + * It's recommended for seqno fences to call dma_fence_signal when the + * operation is complete, it makes it possible to prevent issues from + * wraparound between time of issue and time of use by checking the return + * value of this function before calling hardware-specific wait instructions. + */ +static inline bool +dma_fence_is_signaled(struct dma_fence *fence) +{ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + return true; + + if (fence->ops->signaled && fence->ops->signaled(fence)) { + dma_fence_signal(fence); + return true; + } + + return false; +} + +/** + * dma_fence_is_later - return if f1 is chronologically later than f2 + * @f1: [in] the first fence from the same context + * @f2: [in] the second fence from the same context + * + * Returns true if f1 is chronologically later than f2. Both fences must be + * from the same context, since a seqno is not re-used across contexts. + */ +static inline bool dma_fence_is_later(struct dma_fence *f1, + struct dma_fence *f2) +{ + if (WARN_ON(f1->context != f2->context)) + return false; + + return (int)(f1->seqno - f2->seqno) > 0; +} + +/** + * dma_fence_later - return the chronologically later fence + * @f1: [in] the first fence from the same context + * @f2: [in] the second fence from the same context + * + * Returns NULL if both fences are signaled, otherwise the fence that would be + * signaled last. Both fences must be from the same context, since a seqno is + * not re-used across contexts. + */ +static inline struct dma_fence *dma_fence_later(struct dma_fence *f1, + struct dma_fence *f2) +{ + if (WARN_ON(f1->context != f2->context)) + return NULL; + + /* + * Can't check just DMA_FENCE_FLAG_SIGNALED_BIT here, it may never + * have been set if enable_signaling wasn't called, and enabling that + * here is overkill. + */ + if (dma_fence_is_later(f1, f2)) + return dma_fence_is_signaled(f1) ? NULL : f1; + else + return dma_fence_is_signaled(f2) ? NULL : f2; +} + +signed long dma_fence_wait_timeout(struct dma_fence *, + bool intr, signed long timeout); +signed long dma_fence_wait_any_timeout(struct dma_fence **fences, + uint32_t count, + bool intr, signed long timeout); + +/** + * dma_fence_wait - sleep until the fence gets signaled + * @fence: [in] the fence to wait on + * @intr: [in] if true, do an interruptible wait + * + * This function will return -ERESTARTSYS if interrupted by a signal, + * or 0 if the fence was signaled. Other error values may be + * returned on custom implementations. + * + * Performs a synchronous wait on this fence. It is assumed the caller + * directly or indirectly holds a reference to the fence, otherwise the + * fence might be freed before return, resulting in undefined behavior. + */ +static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr) +{ + signed long ret; + + /* Since dma_fence_wait_timeout cannot timeout with + * MAX_SCHEDULE_TIMEOUT, only valid return values are + * -ERESTARTSYS and MAX_SCHEDULE_TIMEOUT. + */ + ret = dma_fence_wait_timeout(fence, intr, MAX_SCHEDULE_TIMEOUT); + + return ret < 0 ? ret : 0; +} + +u64 dma_fence_context_alloc(unsigned num); + +#define DMA_FENCE_TRACE(f, fmt, args...) \ + do { \ + struct dma_fence *__ff = (f); \ + if (IS_ENABLED(CONFIG_DMA_FENCE_TRACE)) \ + pr_info("f %llu#%u: " fmt, \ + __ff->context, __ff->seqno, ##args); \ + } while (0) + +#define DMA_FENCE_WARN(f, fmt, args...) \ + do { \ + struct dma_fence *__ff = (f); \ + pr_warn("f %llu#%u: " fmt, __ff->context, __ff->seqno, \ + ##args); \ + } while (0) + +#define DMA_FENCE_ERR(f, fmt, args...) \ + do { \ + struct dma_fence *__ff = (f); \ + pr_err("f %llu#%u: " fmt, __ff->context, __ff->seqno, \ + ##args); \ + } while (0) + +#endif /* __LINUX_DMA_FENCE_H */ diff --git a/include/linux/fence-array.h b/include/linux/fence-array.h deleted file mode 100644 index 9ea2bde10ac1..000000000000 --- a/include/linux/fence-array.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * fence-array: aggregates fence to be waited together - * - * Copyright (C) 2016 Collabora Ltd - * Copyright (C) 2016 Advanced Micro Devices, Inc. - * Authors: - * Gustavo Padovan - * Christian König - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#ifndef __LINUX_FENCE_ARRAY_H -#define __LINUX_FENCE_ARRAY_H - -#include - -/** - * struct fence_array_cb - callback helper for fence array - * @cb: fence callback structure for signaling - * @array: reference to the parent fence array object - */ -struct fence_array_cb { - struct fence_cb cb; - struct fence_array *array; -}; - -/** - * struct fence_array - fence to represent an array of fences - * @base: fence base class - * @lock: spinlock for fence handling - * @num_fences: number of fences in the array - * @num_pending: fences in the array still pending - * @fences: array of the fences - */ -struct fence_array { - struct fence base; - - spinlock_t lock; - unsigned num_fences; - atomic_t num_pending; - struct fence **fences; -}; - -extern const struct fence_ops fence_array_ops; - -/** - * fence_is_array - check if a fence is from the array subsclass - * @fence: fence to test - * - * Return true if it is a fence_array and false otherwise. - */ -static inline bool fence_is_array(struct fence *fence) -{ - return fence->ops == &fence_array_ops; -} - -/** - * to_fence_array - cast a fence to a fence_array - * @fence: fence to cast to a fence_array - * - * Returns NULL if the fence is not a fence_array, - * or the fence_array otherwise. - */ -static inline struct fence_array *to_fence_array(struct fence *fence) -{ - if (fence->ops != &fence_array_ops) - return NULL; - - return container_of(fence, struct fence_array, base); -} - -struct fence_array *fence_array_create(int num_fences, struct fence **fences, - u64 context, unsigned seqno, - bool signal_on_any); - -#endif /* __LINUX_FENCE_ARRAY_H */ diff --git a/include/linux/fence.h b/include/linux/fence.h deleted file mode 100644 index c9c5ba98c302..000000000000 --- a/include/linux/fence.h +++ /dev/null @@ -1,424 +0,0 @@ -/* - * Fence mechanism for dma-buf to allow for asynchronous dma access - * - * Copyright (C) 2012 Canonical Ltd - * Copyright (C) 2012 Texas Instruments - * - * Authors: - * Rob Clark - * Maarten Lankhorst - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#ifndef __LINUX_FENCE_H -#define __LINUX_FENCE_H - -#include -#include -#include -#include -#include -#include -#include -#include - -struct fence; -struct fence_ops; -struct fence_cb; - -/** - * struct fence - software synchronization primitive - * @refcount: refcount for this fence - * @ops: fence_ops associated with this fence - * @rcu: used for releasing fence with kfree_rcu - * @cb_list: list of all callbacks to call - * @lock: spin_lock_irqsave used for locking - * @context: execution context this fence belongs to, returned by - * fence_context_alloc() - * @seqno: the sequence number of this fence inside the execution context, - * can be compared to decide which fence would be signaled later. - * @flags: A mask of FENCE_FLAG_* defined below - * @timestamp: Timestamp when the fence was signaled. - * @status: Optional, only valid if < 0, must be set before calling - * fence_signal, indicates that the fence has completed with an error. - * - * the flags member must be manipulated and read using the appropriate - * atomic ops (bit_*), so taking the spinlock will not be needed most - * of the time. - * - * FENCE_FLAG_SIGNALED_BIT - fence is already signaled - * FENCE_FLAG_ENABLE_SIGNAL_BIT - enable_signaling might have been called* - * FENCE_FLAG_USER_BITS - start of the unused bits, can be used by the - * implementer of the fence for its own purposes. Can be used in different - * ways by different fence implementers, so do not rely on this. - * - * Since atomic bitops are used, this is not guaranteed to be the case. - * Particularly, if the bit was set, but fence_signal was called right - * before this bit was set, it would have been able to set the - * FENCE_FLAG_SIGNALED_BIT, before enable_signaling was called. - * Adding a check for FENCE_FLAG_SIGNALED_BIT after setting - * FENCE_FLAG_ENABLE_SIGNAL_BIT closes this race, and makes sure that - * after fence_signal was called, any enable_signaling call will have either - * been completed, or never called at all. - */ -struct fence { - struct kref refcount; - const struct fence_ops *ops; - struct rcu_head rcu; - struct list_head cb_list; - spinlock_t *lock; - u64 context; - unsigned seqno; - unsigned long flags; - ktime_t timestamp; - int status; -}; - -enum fence_flag_bits { - FENCE_FLAG_SIGNALED_BIT, - FENCE_FLAG_ENABLE_SIGNAL_BIT, - FENCE_FLAG_USER_BITS, /* must always be last member */ -}; - -typedef void (*fence_func_t)(struct fence *fence, struct fence_cb *cb); - -/** - * struct fence_cb - callback for fence_add_callback - * @node: used by fence_add_callback to append this struct to fence::cb_list - * @func: fence_func_t to call - * - * This struct will be initialized by fence_add_callback, additional - * data can be passed along by embedding fence_cb in another struct. - */ -struct fence_cb { - struct list_head node; - fence_func_t func; -}; - -/** - * struct fence_ops - operations implemented for fence - * @get_driver_name: returns the driver name. - * @get_timeline_name: return the name of the context this fence belongs to. - * @enable_signaling: enable software signaling of fence. - * @signaled: [optional] peek whether the fence is signaled, can be null. - * @wait: custom wait implementation, or fence_default_wait. - * @release: [optional] called on destruction of fence, can be null - * @fill_driver_data: [optional] callback to fill in free-form debug info - * Returns amount of bytes filled, or -errno. - * @fence_value_str: [optional] fills in the value of the fence as a string - * @timeline_value_str: [optional] fills in the current value of the timeline - * as a string - * - * Notes on enable_signaling: - * For fence implementations that have the capability for hw->hw - * signaling, they can implement this op to enable the necessary - * irqs, or insert commands into cmdstream, etc. This is called - * in the first wait() or add_callback() path to let the fence - * implementation know that there is another driver waiting on - * the signal (ie. hw->sw case). - * - * This function can be called called from atomic context, but not - * from irq context, so normal spinlocks can be used. - * - * A return value of false indicates the fence already passed, - * or some failure occurred that made it impossible to enable - * signaling. True indicates successful enabling. - * - * fence->status may be set in enable_signaling, but only when false is - * returned. - * - * Calling fence_signal before enable_signaling is called allows - * for a tiny race window in which enable_signaling is called during, - * before, or after fence_signal. To fight this, it is recommended - * that before enable_signaling returns true an extra reference is - * taken on the fence, to be released when the fence is signaled. - * This will mean fence_signal will still be called twice, but - * the second time will be a noop since it was already signaled. - * - * Notes on signaled: - * May set fence->status if returning true. - * - * Notes on wait: - * Must not be NULL, set to fence_default_wait for default implementation. - * the fence_default_wait implementation should work for any fence, as long - * as enable_signaling works correctly. - * - * Must return -ERESTARTSYS if the wait is intr = true and the wait was - * interrupted, and remaining jiffies if fence has signaled, or 0 if wait - * timed out. Can also return other error values on custom implementations, - * which should be treated as if the fence is signaled. For example a hardware - * lockup could be reported like that. - * - * Notes on release: - * Can be NULL, this function allows additional commands to run on - * destruction of the fence. Can be called from irq context. - * If pointer is set to NULL, kfree will get called instead. - */ - -struct fence_ops { - const char * (*get_driver_name)(struct fence *fence); - const char * (*get_timeline_name)(struct fence *fence); - bool (*enable_signaling)(struct fence *fence); - bool (*signaled)(struct fence *fence); - signed long (*wait)(struct fence *fence, bool intr, signed long timeout); - void (*release)(struct fence *fence); - - int (*fill_driver_data)(struct fence *fence, void *data, int size); - void (*fence_value_str)(struct fence *fence, char *str, int size); - void (*timeline_value_str)(struct fence *fence, char *str, int size); -}; - -void fence_init(struct fence *fence, const struct fence_ops *ops, - spinlock_t *lock, u64 context, unsigned seqno); - -void fence_release(struct kref *kref); -void fence_free(struct fence *fence); - -/** - * fence_put - decreases refcount of the fence - * @fence: [in] fence to reduce refcount of - */ -static inline void fence_put(struct fence *fence) -{ - if (fence) - kref_put(&fence->refcount, fence_release); -} - -/** - * fence_get - increases refcount of the fence - * @fence: [in] fence to increase refcount of - * - * Returns the same fence, with refcount increased by 1. - */ -static inline struct fence *fence_get(struct fence *fence) -{ - if (fence) - kref_get(&fence->refcount); - return fence; -} - -/** - * fence_get_rcu - get a fence from a reservation_object_list with rcu read lock - * @fence: [in] fence to increase refcount of - * - * Function returns NULL if no refcount could be obtained, or the fence. - */ -static inline struct fence *fence_get_rcu(struct fence *fence) -{ - if (kref_get_unless_zero(&fence->refcount)) - return fence; - else - return NULL; -} - -/** - * fence_get_rcu_safe - acquire a reference to an RCU tracked fence - * @fence: [in] pointer to fence to increase refcount of - * - * Function returns NULL if no refcount could be obtained, or the fence. - * This function handles acquiring a reference to a fence that may be - * reallocated within the RCU grace period (such as with SLAB_DESTROY_BY_RCU), - * so long as the caller is using RCU on the pointer to the fence. - * - * An alternative mechanism is to employ a seqlock to protect a bunch of - * fences, such as used by struct reservation_object. When using a seqlock, - * the seqlock must be taken before and checked after a reference to the - * fence is acquired (as shown here). - * - * The caller is required to hold the RCU read lock. - */ -static inline struct fence *fence_get_rcu_safe(struct fence * __rcu *fencep) -{ - do { - struct fence *fence; - - fence = rcu_dereference(*fencep); - if (!fence || !fence_get_rcu(fence)) - return NULL; - - /* The atomic_inc_not_zero() inside fence_get_rcu() - * provides a full memory barrier upon success (such as now). - * This is paired with the write barrier from assigning - * to the __rcu protected fence pointer so that if that - * pointer still matches the current fence, we know we - * have successfully acquire a reference to it. If it no - * longer matches, we are holding a reference to some other - * reallocated pointer. This is possible if the allocator - * is using a freelist like SLAB_DESTROY_BY_RCU where the - * fence remains valid for the RCU grace period, but it - * may be reallocated. When using such allocators, we are - * responsible for ensuring the reference we get is to - * the right fence, as below. - */ - if (fence == rcu_access_pointer(*fencep)) - return rcu_pointer_handoff(fence); - - fence_put(fence); - } while (1); -} - -int fence_signal(struct fence *fence); -int fence_signal_locked(struct fence *fence); -signed long fence_default_wait(struct fence *fence, bool intr, signed long timeout); -int fence_add_callback(struct fence *fence, struct fence_cb *cb, - fence_func_t func); -bool fence_remove_callback(struct fence *fence, struct fence_cb *cb); -void fence_enable_sw_signaling(struct fence *fence); - -/** - * fence_is_signaled_locked - Return an indication if the fence is signaled yet. - * @fence: [in] the fence to check - * - * Returns true if the fence was already signaled, false if not. Since this - * function doesn't enable signaling, it is not guaranteed to ever return - * true if fence_add_callback, fence_wait or fence_enable_sw_signaling - * haven't been called before. - * - * This function requires fence->lock to be held. - */ -static inline bool -fence_is_signaled_locked(struct fence *fence) -{ - if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - return true; - - if (fence->ops->signaled && fence->ops->signaled(fence)) { - fence_signal_locked(fence); - return true; - } - - return false; -} - -/** - * fence_is_signaled - Return an indication if the fence is signaled yet. - * @fence: [in] the fence to check - * - * Returns true if the fence was already signaled, false if not. Since this - * function doesn't enable signaling, it is not guaranteed to ever return - * true if fence_add_callback, fence_wait or fence_enable_sw_signaling - * haven't been called before. - * - * It's recommended for seqno fences to call fence_signal when the - * operation is complete, it makes it possible to prevent issues from - * wraparound between time of issue and time of use by checking the return - * value of this function before calling hardware-specific wait instructions. - */ -static inline bool -fence_is_signaled(struct fence *fence) -{ - if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - return true; - - if (fence->ops->signaled && fence->ops->signaled(fence)) { - fence_signal(fence); - return true; - } - - return false; -} - -/** - * fence_is_later - return if f1 is chronologically later than f2 - * @f1: [in] the first fence from the same context - * @f2: [in] the second fence from the same context - * - * Returns true if f1 is chronologically later than f2. Both fences must be - * from the same context, since a seqno is not re-used across contexts. - */ -static inline bool fence_is_later(struct fence *f1, struct fence *f2) -{ - if (WARN_ON(f1->context != f2->context)) - return false; - - return (int)(f1->seqno - f2->seqno) > 0; -} - -/** - * fence_later - return the chronologically later fence - * @f1: [in] the first fence from the same context - * @f2: [in] the second fence from the same context - * - * Returns NULL if both fences are signaled, otherwise the fence that would be - * signaled last. Both fences must be from the same context, since a seqno is - * not re-used across contexts. - */ -static inline struct fence *fence_later(struct fence *f1, struct fence *f2) -{ - if (WARN_ON(f1->context != f2->context)) - return NULL; - - /* - * can't check just FENCE_FLAG_SIGNALED_BIT here, it may never have been - * set if enable_signaling wasn't called, and enabling that here is - * overkill. - */ - if (fence_is_later(f1, f2)) - return fence_is_signaled(f1) ? NULL : f1; - else - return fence_is_signaled(f2) ? NULL : f2; -} - -signed long fence_wait_timeout(struct fence *, bool intr, signed long timeout); -signed long fence_wait_any_timeout(struct fence **fences, uint32_t count, - bool intr, signed long timeout); - -/** - * fence_wait - sleep until the fence gets signaled - * @fence: [in] the fence to wait on - * @intr: [in] if true, do an interruptible wait - * - * This function will return -ERESTARTSYS if interrupted by a signal, - * or 0 if the fence was signaled. Other error values may be - * returned on custom implementations. - * - * Performs a synchronous wait on this fence. It is assumed the caller - * directly or indirectly holds a reference to the fence, otherwise the - * fence might be freed before return, resulting in undefined behavior. - */ -static inline signed long fence_wait(struct fence *fence, bool intr) -{ - signed long ret; - - /* Since fence_wait_timeout cannot timeout with - * MAX_SCHEDULE_TIMEOUT, only valid return values are - * -ERESTARTSYS and MAX_SCHEDULE_TIMEOUT. - */ - ret = fence_wait_timeout(fence, intr, MAX_SCHEDULE_TIMEOUT); - - return ret < 0 ? ret : 0; -} - -u64 fence_context_alloc(unsigned num); - -#define FENCE_TRACE(f, fmt, args...) \ - do { \ - struct fence *__ff = (f); \ - if (IS_ENABLED(CONFIG_FENCE_TRACE)) \ - pr_info("f %llu#%u: " fmt, \ - __ff->context, __ff->seqno, ##args); \ - } while (0) - -#define FENCE_WARN(f, fmt, args...) \ - do { \ - struct fence *__ff = (f); \ - pr_warn("f %llu#%u: " fmt, __ff->context, __ff->seqno, \ - ##args); \ - } while (0) - -#define FENCE_ERR(f, fmt, args...) \ - do { \ - struct fence *__ff = (f); \ - pr_err("f %llu#%u: " fmt, __ff->context, __ff->seqno, \ - ##args); \ - } while (0) - -#endif /* __LINUX_FENCE_H */ diff --git a/include/linux/reservation.h b/include/linux/reservation.h index b0f305e77b7f..2e313cca08f0 100644 --- a/include/linux/reservation.h +++ b/include/linux/reservation.h @@ -40,7 +40,7 @@ #define _LINUX_RESERVATION_H #include -#include +#include #include #include #include @@ -59,7 +59,7 @@ extern const char reservation_seqcount_string[]; struct reservation_object_list { struct rcu_head rcu; u32 shared_count, shared_max; - struct fence __rcu *shared[]; + struct dma_fence __rcu *shared[]; }; /** @@ -74,7 +74,7 @@ struct reservation_object { struct ww_mutex lock; seqcount_t seq; - struct fence __rcu *fence_excl; + struct dma_fence __rcu *fence_excl; struct reservation_object_list __rcu *fence; struct reservation_object_list *staged; }; @@ -107,7 +107,7 @@ reservation_object_fini(struct reservation_object *obj) { int i; struct reservation_object_list *fobj; - struct fence *excl; + struct dma_fence *excl; /* * This object should be dead and all references must have @@ -115,12 +115,12 @@ reservation_object_fini(struct reservation_object *obj) */ excl = rcu_dereference_protected(obj->fence_excl, 1); if (excl) - fence_put(excl); + dma_fence_put(excl); fobj = rcu_dereference_protected(obj->fence, 1); if (fobj) { for (i = 0; i < fobj->shared_count; ++i) - fence_put(rcu_dereference_protected(fobj->shared[i], 1)); + dma_fence_put(rcu_dereference_protected(fobj->shared[i], 1)); kfree(fobj); } @@ -155,7 +155,7 @@ reservation_object_get_list(struct reservation_object *obj) * RETURNS * The exclusive fence or NULL */ -static inline struct fence * +static inline struct dma_fence * reservation_object_get_excl(struct reservation_object *obj) { return rcu_dereference_protected(obj->fence_excl, @@ -173,10 +173,10 @@ reservation_object_get_excl(struct reservation_object *obj) * RETURNS * The exclusive fence or NULL if none */ -static inline struct fence * +static inline struct dma_fence * reservation_object_get_excl_rcu(struct reservation_object *obj) { - struct fence *fence; + struct dma_fence *fence; unsigned seq; retry: seq = read_seqcount_begin(&obj->seq); @@ -186,22 +186,22 @@ retry: rcu_read_unlock(); goto retry; } - fence = fence_get(fence); + fence = dma_fence_get(fence); rcu_read_unlock(); return fence; } int reservation_object_reserve_shared(struct reservation_object *obj); void reservation_object_add_shared_fence(struct reservation_object *obj, - struct fence *fence); + struct dma_fence *fence); void reservation_object_add_excl_fence(struct reservation_object *obj, - struct fence *fence); + struct dma_fence *fence); int reservation_object_get_fences_rcu(struct reservation_object *obj, - struct fence **pfence_excl, + struct dma_fence **pfence_excl, unsigned *pshared_count, - struct fence ***pshared); + struct dma_fence ***pshared); long reservation_object_wait_timeout_rcu(struct reservation_object *obj, bool wait_all, bool intr, diff --git a/include/linux/seqno-fence.h b/include/linux/seqno-fence.h index a1ba6a5ccdd6..c58c535d12a8 100644 --- a/include/linux/seqno-fence.h +++ b/include/linux/seqno-fence.h @@ -20,7 +20,7 @@ #ifndef __LINUX_SEQNO_FENCE_H #define __LINUX_SEQNO_FENCE_H -#include +#include #include enum seqno_fence_condition { @@ -29,15 +29,15 @@ enum seqno_fence_condition { }; struct seqno_fence { - struct fence base; + struct dma_fence base; - const struct fence_ops *ops; + const struct dma_fence_ops *ops; struct dma_buf *sync_buf; uint32_t seqno_ofs; enum seqno_fence_condition condition; }; -extern const struct fence_ops seqno_fence_ops; +extern const struct dma_fence_ops seqno_fence_ops; /** * to_seqno_fence - cast a fence to a seqno_fence @@ -47,7 +47,7 @@ extern const struct fence_ops seqno_fence_ops; * or the seqno_fence otherwise. */ static inline struct seqno_fence * -to_seqno_fence(struct fence *fence) +to_seqno_fence(struct dma_fence *fence) { if (fence->ops != &seqno_fence_ops) return NULL; @@ -83,9 +83,9 @@ to_seqno_fence(struct fence *fence) * dma-buf for sync_buf, since mapping or unmapping the sync_buf to the * device's vm can be expensive. * - * It is recommended for creators of seqno_fence to call fence_signal + * It is recommended for creators of seqno_fence to call dma_fence_signal() * before destruction. This will prevent possible issues from wraparound at - * time of issue vs time of check, since users can check fence_is_signaled + * time of issue vs time of check, since users can check dma_fence_is_signaled() * before submitting instructions for the hardware to wait on the fence. * However, when ops.enable_signaling is not called, it doesn't have to be * done as soon as possible, just before there's any real danger of seqno @@ -96,18 +96,18 @@ seqno_fence_init(struct seqno_fence *fence, spinlock_t *lock, struct dma_buf *sync_buf, uint32_t context, uint32_t seqno_ofs, uint32_t seqno, enum seqno_fence_condition cond, - const struct fence_ops *ops) + const struct dma_fence_ops *ops) { BUG_ON(!fence || !sync_buf || !ops); BUG_ON(!ops->wait || !ops->enable_signaling || !ops->get_driver_name || !ops->get_timeline_name); /* - * ops is used in fence_init for get_driver_name, so needs to be + * ops is used in dma_fence_init for get_driver_name, so needs to be * initialized first */ fence->ops = ops; - fence_init(&fence->base, &seqno_fence_ops, lock, context, seqno); + dma_fence_init(&fence->base, &seqno_fence_ops, lock, context, seqno); get_dma_buf(sync_buf); fence->sync_buf = sync_buf; fence->seqno_ofs = seqno_ofs; diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h index aa17ccfc2f57..3e3ab84fc4cd 100644 --- a/include/linux/sync_file.h +++ b/include/linux/sync_file.h @@ -18,8 +18,8 @@ #include #include #include -#include -#include +#include +#include /** * struct sync_file - sync file to export to the userspace @@ -41,13 +41,13 @@ struct sync_file { wait_queue_head_t wq; - struct fence *fence; - struct fence_cb cb; + struct dma_fence *fence; + struct dma_fence_cb cb; }; -#define POLL_ENABLED FENCE_FLAG_USER_BITS +#define POLL_ENABLED DMA_FENCE_FLAG_USER_BITS -struct sync_file *sync_file_create(struct fence *fence); -struct fence *sync_file_get_fence(int fd); +struct sync_file *sync_file_create(struct dma_fence *fence); +struct dma_fence *sync_file_get_fence(int fd); #endif /* _LINUX_SYNC_H */ diff --git a/include/trace/events/dma_fence.h b/include/trace/events/dma_fence.h new file mode 100644 index 000000000000..1157cb4c3c6f --- /dev/null +++ b/include/trace/events/dma_fence.h @@ -0,0 +1,128 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM dma_fence + +#if !defined(_TRACE_FENCE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_DMA_FENCE_H + +#include + +struct dma_fence; + +TRACE_EVENT(dma_fence_annotate_wait_on, + + /* fence: the fence waiting on f1, f1: the fence to be waited on. */ + TP_PROTO(struct dma_fence *fence, struct dma_fence *f1), + + TP_ARGS(fence, f1), + + TP_STRUCT__entry( + __string(driver, fence->ops->get_driver_name(fence)) + __string(timeline, fence->ops->get_timeline_name(fence)) + __field(unsigned int, context) + __field(unsigned int, seqno) + + __string(waiting_driver, f1->ops->get_driver_name(f1)) + __string(waiting_timeline, f1->ops->get_timeline_name(f1)) + __field(unsigned int, waiting_context) + __field(unsigned int, waiting_seqno) + ), + + TP_fast_assign( + __assign_str(driver, fence->ops->get_driver_name(fence)) + __assign_str(timeline, fence->ops->get_timeline_name(fence)) + __entry->context = fence->context; + __entry->seqno = fence->seqno; + + __assign_str(waiting_driver, f1->ops->get_driver_name(f1)) + __assign_str(waiting_timeline, f1->ops->get_timeline_name(f1)) + __entry->waiting_context = f1->context; + __entry->waiting_seqno = f1->seqno; + + ), + + TP_printk("driver=%s timeline=%s context=%u seqno=%u " \ + "waits on driver=%s timeline=%s context=%u seqno=%u", + __get_str(driver), __get_str(timeline), __entry->context, + __entry->seqno, + __get_str(waiting_driver), __get_str(waiting_timeline), + __entry->waiting_context, __entry->waiting_seqno) +); + +DECLARE_EVENT_CLASS(dma_fence, + + TP_PROTO(struct dma_fence *fence), + + TP_ARGS(fence), + + TP_STRUCT__entry( + __string(driver, fence->ops->get_driver_name(fence)) + __string(timeline, fence->ops->get_timeline_name(fence)) + __field(unsigned int, context) + __field(unsigned int, seqno) + ), + + TP_fast_assign( + __assign_str(driver, fence->ops->get_driver_name(fence)) + __assign_str(timeline, fence->ops->get_timeline_name(fence)) + __entry->context = fence->context; + __entry->seqno = fence->seqno; + ), + + TP_printk("driver=%s timeline=%s context=%u seqno=%u", + __get_str(driver), __get_str(timeline), __entry->context, + __entry->seqno) +); + +DEFINE_EVENT(dma_fence, dma_fence_emit, + + TP_PROTO(struct dma_fence *fence), + + TP_ARGS(fence) +); + +DEFINE_EVENT(dma_fence, dma_fence_init, + + TP_PROTO(struct dma_fence *fence), + + TP_ARGS(fence) +); + +DEFINE_EVENT(dma_fence, dma_fence_destroy, + + TP_PROTO(struct dma_fence *fence), + + TP_ARGS(fence) +); + +DEFINE_EVENT(dma_fence, dma_fence_enable_signal, + + TP_PROTO(struct dma_fence *fence), + + TP_ARGS(fence) +); + +DEFINE_EVENT(dma_fence, dma_fence_signaled, + + TP_PROTO(struct dma_fence *fence), + + TP_ARGS(fence) +); + +DEFINE_EVENT(dma_fence, dma_fence_wait_start, + + TP_PROTO(struct dma_fence *fence), + + TP_ARGS(fence) +); + +DEFINE_EVENT(dma_fence, dma_fence_wait_end, + + TP_PROTO(struct dma_fence *fence), + + TP_ARGS(fence) +); + +#endif /* _TRACE_DMA_FENCE_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/fence.h b/include/trace/events/fence.h deleted file mode 100644 index d6dfa05ba322..000000000000 --- a/include/trace/events/fence.h +++ /dev/null @@ -1,128 +0,0 @@ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM fence - -#if !defined(_TRACE_FENCE_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_FENCE_H - -#include - -struct fence; - -TRACE_EVENT(fence_annotate_wait_on, - - /* fence: the fence waiting on f1, f1: the fence to be waited on. */ - TP_PROTO(struct fence *fence, struct fence *f1), - - TP_ARGS(fence, f1), - - TP_STRUCT__entry( - __string(driver, fence->ops->get_driver_name(fence)) - __string(timeline, fence->ops->get_timeline_name(fence)) - __field(unsigned int, context) - __field(unsigned int, seqno) - - __string(waiting_driver, f1->ops->get_driver_name(f1)) - __string(waiting_timeline, f1->ops->get_timeline_name(f1)) - __field(unsigned int, waiting_context) - __field(unsigned int, waiting_seqno) - ), - - TP_fast_assign( - __assign_str(driver, fence->ops->get_driver_name(fence)) - __assign_str(timeline, fence->ops->get_timeline_name(fence)) - __entry->context = fence->context; - __entry->seqno = fence->seqno; - - __assign_str(waiting_driver, f1->ops->get_driver_name(f1)) - __assign_str(waiting_timeline, f1->ops->get_timeline_name(f1)) - __entry->waiting_context = f1->context; - __entry->waiting_seqno = f1->seqno; - - ), - - TP_printk("driver=%s timeline=%s context=%u seqno=%u " \ - "waits on driver=%s timeline=%s context=%u seqno=%u", - __get_str(driver), __get_str(timeline), __entry->context, - __entry->seqno, - __get_str(waiting_driver), __get_str(waiting_timeline), - __entry->waiting_context, __entry->waiting_seqno) -); - -DECLARE_EVENT_CLASS(fence, - - TP_PROTO(struct fence *fence), - - TP_ARGS(fence), - - TP_STRUCT__entry( - __string(driver, fence->ops->get_driver_name(fence)) - __string(timeline, fence->ops->get_timeline_name(fence)) - __field(unsigned int, context) - __field(unsigned int, seqno) - ), - - TP_fast_assign( - __assign_str(driver, fence->ops->get_driver_name(fence)) - __assign_str(timeline, fence->ops->get_timeline_name(fence)) - __entry->context = fence->context; - __entry->seqno = fence->seqno; - ), - - TP_printk("driver=%s timeline=%s context=%u seqno=%u", - __get_str(driver), __get_str(timeline), __entry->context, - __entry->seqno) -); - -DEFINE_EVENT(fence, fence_emit, - - TP_PROTO(struct fence *fence), - - TP_ARGS(fence) -); - -DEFINE_EVENT(fence, fence_init, - - TP_PROTO(struct fence *fence), - - TP_ARGS(fence) -); - -DEFINE_EVENT(fence, fence_destroy, - - TP_PROTO(struct fence *fence), - - TP_ARGS(fence) -); - -DEFINE_EVENT(fence, fence_enable_signal, - - TP_PROTO(struct fence *fence), - - TP_ARGS(fence) -); - -DEFINE_EVENT(fence, fence_signaled, - - TP_PROTO(struct fence *fence), - - TP_ARGS(fence) -); - -DEFINE_EVENT(fence, fence_wait_start, - - TP_PROTO(struct fence *fence), - - TP_ARGS(fence) -); - -DEFINE_EVENT(fence, fence_wait_end, - - TP_PROTO(struct fence *fence), - - TP_ARGS(fence) -); - -#endif /* _TRACE_FENCE_H */ - -/* This part must be outside protection */ -#include -- cgit v1.2.3 From 974e6f02e27e1b46c6c5e600e70ced25079f73eb Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Mon, 1 Aug 2016 11:54:35 +0200 Subject: iio: cros_ec_sensors_core: Add common functions for the ChromeOS EC Sensor Hub. Add the core functions to be able to support the sensors attached behind the ChromeOS Embedded Controller and used by other IIO cros-ec sensor drivers. The cros_ec_sensor_core driver matches with current driver in ChromeOS 4.4 tree, so it includes all the fixes at the moment. The support for this driver was made by Gwendal Grignou. The original patch and all the fixes has been squashed and rebased on top of mainline. Signed-off-by: Gwendal Grignou Signed-off-by: Guenter Roeck [eballetbo: split, squash and rebase on top of mainline the patches found in ChromeOS tree] Signed-off-by: Enric Balletbo i Serra Signed-off-by: Jonathan Cameron --- Documentation/ABI/testing/sysfs-bus-iio-cros-ec | 18 + drivers/iio/common/Kconfig | 1 + drivers/iio/common/Makefile | 1 + drivers/iio/common/cros_ec_sensors/Kconfig | 14 + drivers/iio/common/cros_ec_sensors/Makefile | 5 + .../common/cros_ec_sensors/cros_ec_sensors_core.c | 450 +++++++++++++++++++++ .../common/cros_ec_sensors/cros_ec_sensors_core.h | 175 ++++++++ include/linux/mfd/cros_ec.h | 9 + include/linux/mfd/cros_ec_commands.h | 99 ++++- 9 files changed, 767 insertions(+), 5 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-bus-iio-cros-ec create mode 100644 drivers/iio/common/cros_ec_sensors/Kconfig create mode 100644 drivers/iio/common/cros_ec_sensors/Makefile create mode 100644 drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c create mode 100644 drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.h (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-bus-iio-cros-ec b/Documentation/ABI/testing/sysfs-bus-iio-cros-ec new file mode 100644 index 000000000000..297b9720f024 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-iio-cros-ec @@ -0,0 +1,18 @@ +What: /sys/bus/iio/devices/iio:deviceX/calibrate +Date: July 2015 +KernelVersion: 4.7 +Contact: linux-iio@vger.kernel.org +Description: + Writing '1' will perform a FOC (Fast Online Calibration). The + corresponding calibration offsets can be read from *_calibbias + entries. + +What: /sys/bus/iio/devices/iio:deviceX/location +Date: July 2015 +KernelVersion: 4.7 +Contact: linux-iio@vger.kernel.org +Description: + This attribute returns a string with the physical location where + the motion sensor is placed. For example, in a laptop a motion + sensor can be located on the base or on the lid. Current valid + values are 'base' and 'lid'. diff --git a/drivers/iio/common/Kconfig b/drivers/iio/common/Kconfig index 26a6026de614..e108996a9627 100644 --- a/drivers/iio/common/Kconfig +++ b/drivers/iio/common/Kconfig @@ -2,6 +2,7 @@ # IIO common modules # +source "drivers/iio/common/cros_ec_sensors/Kconfig" source "drivers/iio/common/hid-sensors/Kconfig" source "drivers/iio/common/ms_sensors/Kconfig" source "drivers/iio/common/ssp_sensors/Kconfig" diff --git a/drivers/iio/common/Makefile b/drivers/iio/common/Makefile index 585da6a1b188..6fa760e1bdd5 100644 --- a/drivers/iio/common/Makefile +++ b/drivers/iio/common/Makefile @@ -7,6 +7,7 @@ # # When adding new entries keep the list in alphabetical order +obj-y += cros_ec_sensors/ obj-y += hid-sensors/ obj-y += ms_sensors/ obj-y += ssp_sensors/ diff --git a/drivers/iio/common/cros_ec_sensors/Kconfig b/drivers/iio/common/cros_ec_sensors/Kconfig new file mode 100644 index 000000000000..24743be15a5b --- /dev/null +++ b/drivers/iio/common/cros_ec_sensors/Kconfig @@ -0,0 +1,14 @@ +# +# Chrome OS Embedded Controller managed sensors library +# +config IIO_CROS_EC_SENSORS_CORE + tristate "ChromeOS EC Sensors Core" + depends on SYSFS && MFD_CROS_EC + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER + help + Base module for the ChromeOS EC Sensors module. + Contains core functions used by other IIO CrosEC sensor + drivers. + Define common attributes and sysfs interrupt handler. + diff --git a/drivers/iio/common/cros_ec_sensors/Makefile b/drivers/iio/common/cros_ec_sensors/Makefile new file mode 100644 index 000000000000..95b690139bfd --- /dev/null +++ b/drivers/iio/common/cros_ec_sensors/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for sensors seen through the ChromeOS EC sensor hub. +# + +obj-$(CONFIG_IIO_CROS_EC_SENSORS_CORE) += cros_ec_sensors_core.o diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c new file mode 100644 index 000000000000..a3be7991355e --- /dev/null +++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c @@ -0,0 +1,450 @@ +/* + * cros_ec_sensors_core - Common function for Chrome OS EC sensor driver. + * + * Copyright (C) 2016 Google, Inc + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cros_ec_sensors_core.h" + +static char *cros_ec_loc[] = { + [MOTIONSENSE_LOC_BASE] = "base", + [MOTIONSENSE_LOC_LID] = "lid", + [MOTIONSENSE_LOC_MAX] = "unknown", +}; + +int cros_ec_sensors_core_init(struct platform_device *pdev, + struct iio_dev *indio_dev, + bool physical_device) +{ + struct device *dev = &pdev->dev; + struct cros_ec_sensors_core_state *state = iio_priv(indio_dev); + struct cros_ec_dev *ec = dev_get_drvdata(pdev->dev.parent); + struct cros_ec_sensor_platform *sensor_platform = dev_get_platdata(dev); + + platform_set_drvdata(pdev, indio_dev); + + state->ec = ec->ec_dev; + state->msg = devm_kzalloc(&pdev->dev, + max((u16)sizeof(struct ec_params_motion_sense), + state->ec->max_response), GFP_KERNEL); + if (!state->msg) + return -ENOMEM; + + state->resp = (struct ec_response_motion_sense *)state->msg->data; + + mutex_init(&state->cmd_lock); + + /* Set up the host command structure. */ + state->msg->version = 2; + state->msg->command = EC_CMD_MOTION_SENSE_CMD + ec->cmd_offset; + state->msg->outsize = sizeof(struct ec_params_motion_sense); + + indio_dev->dev.parent = &pdev->dev; + indio_dev->name = pdev->name; + + if (physical_device) { + indio_dev->modes = INDIO_DIRECT_MODE; + + state->param.cmd = MOTIONSENSE_CMD_INFO; + state->param.info.sensor_num = sensor_platform->sensor_num; + if (cros_ec_motion_send_host_cmd(state, 0)) { + dev_warn(dev, "Can not access sensor info\n"); + return -EIO; + } + state->type = state->resp->info.type; + state->loc = state->resp->info.location; + } + + return 0; +} +EXPORT_SYMBOL_GPL(cros_ec_sensors_core_init); + +int cros_ec_motion_send_host_cmd(struct cros_ec_sensors_core_state *state, + u16 opt_length) +{ + int ret; + + if (opt_length) + state->msg->insize = min(opt_length, state->ec->max_response); + else + state->msg->insize = state->ec->max_response; + + memcpy(state->msg->data, &state->param, sizeof(state->param)); + + ret = cros_ec_cmd_xfer_status(state->ec, state->msg); + if (ret < 0) + return -EIO; + + if (ret && + state->resp != (struct ec_response_motion_sense *)state->msg->data) + memcpy(state->resp, state->msg->data, ret); + + return 0; +} +EXPORT_SYMBOL_GPL(cros_ec_motion_send_host_cmd); + +static ssize_t cros_ec_sensors_calibrate(struct iio_dev *indio_dev, + uintptr_t private, const struct iio_chan_spec *chan, + const char *buf, size_t len) +{ + struct cros_ec_sensors_core_state *st = iio_priv(indio_dev); + int ret, i; + bool calibrate; + + ret = strtobool(buf, &calibrate); + if (ret < 0) + return ret; + if (!calibrate) + return -EINVAL; + + mutex_lock(&st->cmd_lock); + st->param.cmd = MOTIONSENSE_CMD_PERFORM_CALIB; + ret = cros_ec_motion_send_host_cmd(st, 0); + if (ret != 0) { + dev_warn(&indio_dev->dev, "Unable to calibrate sensor\n"); + } else { + /* Save values */ + for (i = CROS_EC_SENSOR_X; i < CROS_EC_SENSOR_MAX_AXIS; i++) + st->calib[i] = st->resp->perform_calib.offset[i]; + } + mutex_unlock(&st->cmd_lock); + + return ret ? ret : len; +} + +static ssize_t cros_ec_sensors_loc(struct iio_dev *indio_dev, + uintptr_t private, const struct iio_chan_spec *chan, + char *buf) +{ + struct cros_ec_sensors_core_state *st = iio_priv(indio_dev); + + return snprintf(buf, PAGE_SIZE, "%s\n", cros_ec_loc[st->loc]); +} + +const struct iio_chan_spec_ext_info cros_ec_sensors_ext_info[] = { + { + .name = "calibrate", + .shared = IIO_SHARED_BY_ALL, + .write = cros_ec_sensors_calibrate + }, + { + .name = "location", + .shared = IIO_SHARED_BY_ALL, + .read = cros_ec_sensors_loc + }, + { }, +}; +EXPORT_SYMBOL_GPL(cros_ec_sensors_ext_info); + +/** + * cros_ec_sensors_idx_to_reg - convert index into offset in shared memory + * @st: pointer to state information for device + * @idx: sensor index (should be element of enum sensor_index) + * + * Return: address to read at + */ +static unsigned int cros_ec_sensors_idx_to_reg( + struct cros_ec_sensors_core_state *st, + unsigned int idx) +{ + /* + * When using LPC interface, only space for 2 Accel and one Gyro. + * First halfword of MOTIONSENSE_TYPE_ACCEL is used by angle. + */ + if (st->type == MOTIONSENSE_TYPE_ACCEL) + return EC_MEMMAP_ACC_DATA + sizeof(u16) * + (1 + idx + st->param.info.sensor_num * + CROS_EC_SENSOR_MAX_AXIS); + + return EC_MEMMAP_GYRO_DATA + sizeof(u16) * idx; +} + +static int cros_ec_sensors_cmd_read_u8(struct cros_ec_device *ec, + unsigned int offset, u8 *dest) +{ + return ec->cmd_readmem(ec, offset, 1, dest); +} + +static int cros_ec_sensors_cmd_read_u16(struct cros_ec_device *ec, + unsigned int offset, u16 *dest) +{ + __le16 tmp; + int ret = ec->cmd_readmem(ec, offset, 2, &tmp); + + if (ret >= 0) + *dest = le16_to_cpu(tmp); + + return ret; +} + +/** + * cros_ec_sensors_read_until_not_busy() - read until is not busy + * + * @st: pointer to state information for device + * + * Read from EC status byte until it reads not busy. + * Return: 8-bit status if ok, -errno on failure. + */ +static int cros_ec_sensors_read_until_not_busy( + struct cros_ec_sensors_core_state *st) +{ + struct cros_ec_device *ec = st->ec; + u8 status; + int ret, attempts = 0; + + ret = cros_ec_sensors_cmd_read_u8(ec, EC_MEMMAP_ACC_STATUS, &status); + if (ret < 0) + return ret; + + while (status & EC_MEMMAP_ACC_STATUS_BUSY_BIT) { + /* Give up after enough attempts, return error. */ + if (attempts++ >= 50) + return -EIO; + + /* Small delay every so often. */ + if (attempts % 5 == 0) + msleep(25); + + ret = cros_ec_sensors_cmd_read_u8(ec, EC_MEMMAP_ACC_STATUS, + &status); + if (ret < 0) + return ret; + } + + return status; +} + +/** + * read_ec_sensors_data_unsafe() - read acceleration data from EC shared memory + * @indio_dev: pointer to IIO device + * @scan_mask: bitmap of the sensor indices to scan + * @data: location to store data + * + * This is the unsafe function for reading the EC data. It does not guarantee + * that the EC will not modify the data as it is being read in. + * + * Return: 0 on success, -errno on failure. + */ +static int cros_ec_sensors_read_data_unsafe(struct iio_dev *indio_dev, + unsigned long scan_mask, s16 *data) +{ + struct cros_ec_sensors_core_state *st = iio_priv(indio_dev); + struct cros_ec_device *ec = st->ec; + unsigned int i; + int ret; + + /* Read all sensors enabled in scan_mask. Each value is 2 bytes. */ + for_each_set_bit(i, &scan_mask, indio_dev->masklength) { + ret = cros_ec_sensors_cmd_read_u16(ec, + cros_ec_sensors_idx_to_reg(st, i), + data); + if (ret < 0) + return ret; + + data++; + } + + return 0; +} + +int cros_ec_sensors_read_lpc(struct iio_dev *indio_dev, + unsigned long scan_mask, s16 *data) +{ + struct cros_ec_sensors_core_state *st = iio_priv(indio_dev); + struct cros_ec_device *ec = st->ec; + u8 samp_id = 0xff, status = 0; + int ret, attempts = 0; + + /* + * Continually read all data from EC until the status byte after + * all reads reflects that the EC is not busy and the sample id + * matches the sample id from before all reads. This guarantees + * that data read in was not modified by the EC while reading. + */ + while ((status & (EC_MEMMAP_ACC_STATUS_BUSY_BIT | + EC_MEMMAP_ACC_STATUS_SAMPLE_ID_MASK)) != samp_id) { + /* If we have tried to read too many times, return error. */ + if (attempts++ >= 5) + return -EIO; + + /* Read status byte until EC is not busy. */ + status = cros_ec_sensors_read_until_not_busy(st); + if (status < 0) + return status; + + /* + * Store the current sample id so that we can compare to the + * sample id after reading the data. + */ + samp_id = status & EC_MEMMAP_ACC_STATUS_SAMPLE_ID_MASK; + + /* Read all EC data, format it, and store it into data. */ + ret = cros_ec_sensors_read_data_unsafe(indio_dev, scan_mask, + data); + if (ret < 0) + return ret; + + /* Read status byte. */ + ret = cros_ec_sensors_cmd_read_u8(ec, EC_MEMMAP_ACC_STATUS, + &status); + if (ret < 0) + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(cros_ec_sensors_read_lpc); + +int cros_ec_sensors_read_cmd(struct iio_dev *indio_dev, + unsigned long scan_mask, s16 *data) +{ + struct cros_ec_sensors_core_state *st = iio_priv(indio_dev); + int ret; + unsigned int i; + + /* Read all sensor data through a command. */ + st->param.cmd = MOTIONSENSE_CMD_DATA; + ret = cros_ec_motion_send_host_cmd(st, sizeof(st->resp->data)); + if (ret != 0) { + dev_warn(&indio_dev->dev, "Unable to read sensor data\n"); + return ret; + } + + for_each_set_bit(i, &scan_mask, indio_dev->masklength) { + *data = st->resp->data.data[i]; + data++; + } + + return 0; +} +EXPORT_SYMBOL_GPL(cros_ec_sensors_read_cmd); + +irqreturn_t cros_ec_sensors_capture(int irq, void *p) +{ + struct iio_poll_func *pf = p; + struct iio_dev *indio_dev = pf->indio_dev; + struct cros_ec_sensors_core_state *st = iio_priv(indio_dev); + int ret; + + mutex_lock(&st->cmd_lock); + + /* Clear capture data. */ + memset(st->samples, 0, indio_dev->scan_bytes); + + /* Read data based on which channels are enabled in scan mask. */ + ret = st->read_ec_sensors_data(indio_dev, + *(indio_dev->active_scan_mask), + (s16 *)st->samples); + if (ret < 0) + goto done; + + iio_push_to_buffers_with_timestamp(indio_dev, st->samples, + iio_get_time_ns(indio_dev)); + +done: + /* + * Tell the core we are done with this trigger and ready for the + * next one. + */ + iio_trigger_notify_done(indio_dev->trig); + + mutex_unlock(&st->cmd_lock); + + return IRQ_HANDLED; +} +EXPORT_SYMBOL_GPL(cros_ec_sensors_capture); + +int cros_ec_sensors_core_read(struct cros_ec_sensors_core_state *st, + struct iio_chan_spec const *chan, + int *val, int *val2, long mask) +{ + int ret = IIO_VAL_INT; + + switch (mask) { + case IIO_CHAN_INFO_SAMP_FREQ: + st->param.cmd = MOTIONSENSE_CMD_EC_RATE; + st->param.ec_rate.data = + EC_MOTION_SENSE_NO_VALUE; + + if (cros_ec_motion_send_host_cmd(st, 0)) + ret = -EIO; + else + *val = st->resp->ec_rate.ret; + break; + case IIO_CHAN_INFO_FREQUENCY: + st->param.cmd = MOTIONSENSE_CMD_SENSOR_ODR; + st->param.sensor_odr.data = + EC_MOTION_SENSE_NO_VALUE; + + if (cros_ec_motion_send_host_cmd(st, 0)) + ret = -EIO; + else + *val = st->resp->sensor_odr.ret; + break; + default: + break; + } + + return ret; +} +EXPORT_SYMBOL_GPL(cros_ec_sensors_core_read); + +int cros_ec_sensors_core_write(struct cros_ec_sensors_core_state *st, + struct iio_chan_spec const *chan, + int val, int val2, long mask) +{ + int ret = 0; + + switch (mask) { + case IIO_CHAN_INFO_FREQUENCY: + st->param.cmd = MOTIONSENSE_CMD_SENSOR_ODR; + st->param.sensor_odr.data = val; + + /* Always roundup, so caller gets at least what it asks for. */ + st->param.sensor_odr.roundup = 1; + + if (cros_ec_motion_send_host_cmd(st, 0)) + ret = -EIO; + break; + case IIO_CHAN_INFO_SAMP_FREQ: + st->param.cmd = MOTIONSENSE_CMD_EC_RATE; + st->param.ec_rate.data = val; + + if (cros_ec_motion_send_host_cmd(st, 0)) + ret = -EIO; + else + st->curr_sampl_freq = val; + break; + default: + ret = -EINVAL; + break; + } + return ret; +} +EXPORT_SYMBOL_GPL(cros_ec_sensors_core_write); + +MODULE_DESCRIPTION("ChromeOS EC sensor hub core functions"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.h b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.h new file mode 100644 index 000000000000..8bc2ca3c2e2e --- /dev/null +++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.h @@ -0,0 +1,175 @@ +/* + * ChromeOS EC sensor hub + * + * Copyright (C) 2016 Google, Inc + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __CROS_EC_SENSORS_CORE_H +#define __CROS_EC_SENSORS_CORE_H + +#include + +enum { + CROS_EC_SENSOR_X, + CROS_EC_SENSOR_Y, + CROS_EC_SENSOR_Z, + CROS_EC_SENSOR_MAX_AXIS, +}; + +/* EC returns sensor values using signed 16 bit registers */ +#define CROS_EC_SENSOR_BITS 16 + +/* + * 4 16 bit channels are allowed. + * Good enough for current sensors, they use up to 3 16 bit vectors. + */ +#define CROS_EC_SAMPLE_SIZE (sizeof(s64) * 2) + +/* Minimum sampling period to use when device is suspending */ +#define CROS_EC_MIN_SUSPEND_SAMPLING_FREQUENCY 1000 /* 1 second */ + +/** + * struct cros_ec_sensors_core_state - state data for EC sensors IIO driver + * @ec: cros EC device structure + * @cmd_lock: lock used to prevent simultaneous access to the + * commands. + * @msg: cros EC command structure + * @param: motion sensor parameters structure + * @resp: motion sensor response structure + * @type: type of motion sensor + * @loc: location where the motion sensor is placed + * @calib: calibration parameters. Note that trigger + * captured data will always provide the calibrated + * data + * @samples: static array to hold data from a single capture. + * For each channel we need 2 bytes, except for + * the timestamp. The timestamp is always last and + * is always 8-byte aligned. + * @read_ec_sensors_data: function used for accessing sensors values + * @cuur_sampl_freq: current sampling period + */ +struct cros_ec_sensors_core_state { + struct cros_ec_device *ec; + struct mutex cmd_lock; + + struct cros_ec_command *msg; + struct ec_params_motion_sense param; + struct ec_response_motion_sense *resp; + + enum motionsensor_type type; + enum motionsensor_location loc; + + s16 calib[CROS_EC_SENSOR_MAX_AXIS]; + + u8 samples[CROS_EC_SAMPLE_SIZE]; + + int (*read_ec_sensors_data)(struct iio_dev *indio_dev, + unsigned long scan_mask, s16 *data); + + int curr_sampl_freq; +}; + +/** + * cros_ec_sensors_read_lpc() - retrieve data from EC shared memory + * @indio_dev: pointer to IIO device + * @scan_mask: bitmap of the sensor indices to scan + * @data: location to store data + * + * This is the safe function for reading the EC data. It guarantees that the + * data sampled was not modified by the EC while being read. + * + * Return: 0 on success, -errno on failure. + */ +int cros_ec_sensors_read_lpc(struct iio_dev *indio_dev, unsigned long scan_mask, + s16 *data); + +/** + * cros_ec_sensors_read_cmd() - retrieve data using the EC command protocol + * @indio_dev: pointer to IIO device + * @scan_mask: bitmap of the sensor indices to scan + * @data: location to store data + * + * Return: 0 on success, -errno on failure. + */ +int cros_ec_sensors_read_cmd(struct iio_dev *indio_dev, unsigned long scan_mask, + s16 *data); + +/** + * cros_ec_sensors_core_init() - basic initialization of the core structure + * @pdev: platform device created for the sensors + * @indio_dev: iio device structure of the device + * @physical_device: true if the device refers to a physical device + * + * Return: 0 on success, -errno on failure. + */ +int cros_ec_sensors_core_init(struct platform_device *pdev, + struct iio_dev *indio_dev, bool physical_device); + +/** + * cros_ec_sensors_capture() - the trigger handler function + * @irq: the interrupt number. + * @p: a pointer to the poll function. + * + * On a trigger event occurring, if the pollfunc is attached then this + * handler is called as a threaded interrupt (and hence may sleep). It + * is responsible for grabbing data from the device and pushing it into + * the associated buffer. + * + * Return: IRQ_HANDLED + */ +irqreturn_t cros_ec_sensors_capture(int irq, void *p); + +/** + * cros_ec_motion_send_host_cmd() - send motion sense host command + * @st: pointer to state information for device + * @opt_length: optional length to reduce the response size, useful on the data + * path. Otherwise, the maximal allowed response size is used + * + * When called, the sub-command is assumed to be set in param->cmd. + * + * Return: 0 on success, -errno on failure. + */ +int cros_ec_motion_send_host_cmd(struct cros_ec_sensors_core_state *st, + u16 opt_length); + +/** + * cros_ec_sensors_core_read() - function to request a value from the sensor + * @st: pointer to state information for device + * @chan: channel specification structure table + * @val: will contain one element making up the returned value + * @val2: will contain another element making up the returned value + * @mask: specifies which values to be requested + * + * Return: the type of value returned by the device + */ +int cros_ec_sensors_core_read(struct cros_ec_sensors_core_state *st, + struct iio_chan_spec const *chan, + int *val, int *val2, long mask); + +/** + * cros_ec_sensors_core_write() - function to write a value to the sensor + * @st: pointer to state information for device + * @chan: channel specification structure table + * @val: first part of value to write + * @val2: second part of value to write + * @mask: specifies which values to write + * + * Return: the type of value returned by the device + */ +int cros_ec_sensors_core_write(struct cros_ec_sensors_core_state *st, + struct iio_chan_spec const *chan, + int val, int val2, long mask); + +/* List of extended channel specification for all sensors */ +extern const struct iio_chan_spec_ext_info cros_ec_sensors_ext_info[]; + +#endif /* __CROS_EC_SENSORS_CORE_H */ diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 76f7ef4d3a0d..1f85b7aff097 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -148,6 +148,15 @@ struct cros_ec_device { int event_size; }; +/** + * struct cros_ec_sensor_platform - ChromeOS EC sensor platform information + * + * @sensor_num: Id of the sensor, as reported by the EC. + */ +struct cros_ec_sensor_platform { + u8 sensor_num; +}; + /* struct cros_ec_platform - ChromeOS EC platform information * * @ec_name: name of EC device (e.g. 'cros-ec', 'cros-pd', ...) diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 76728ff37d01..8826e0f64b0e 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -1315,6 +1315,24 @@ enum motionsense_command { */ MOTIONSENSE_CMD_KB_WAKE_ANGLE = 5, + /* + * Returns a single sensor data. + */ + MOTIONSENSE_CMD_DATA = 6, + + /* + * Perform low level calibration.. On sensors that support it, ask to + * do offset calibration. + */ + MOTIONSENSE_CMD_PERFORM_CALIB = 10, + + /* + * Sensor Offset command is a setter/getter command for the offset used + * for calibration. The offsets can be calculated by the host, or via + * PERFORM_CALIB command. + */ + MOTIONSENSE_CMD_SENSOR_OFFSET = 11, + /* Number of motionsense sub-commands. */ MOTIONSENSE_NUM_CMDS }; @@ -1335,12 +1353,18 @@ enum motionsensor_id { enum motionsensor_type { MOTIONSENSE_TYPE_ACCEL = 0, MOTIONSENSE_TYPE_GYRO = 1, + MOTIONSENSE_TYPE_MAG = 2, + MOTIONSENSE_TYPE_PROX = 3, + MOTIONSENSE_TYPE_LIGHT = 4, + MOTIONSENSE_TYPE_ACTIVITY = 5, + MOTIONSENSE_TYPE_MAX }; /* List of motion sensor locations. */ enum motionsensor_location { MOTIONSENSE_LOC_BASE = 0, MOTIONSENSE_LOC_LID = 1, + MOTIONSENSE_LOC_MAX, }; /* List of motion sensor chips. */ @@ -1361,6 +1385,31 @@ enum motionsensor_chip { */ #define EC_MOTION_SENSE_NO_VALUE -1 +#define EC_MOTION_SENSE_INVALID_CALIB_TEMP 0x8000 + +/* Set Calibration information */ +#define MOTION_SENSE_SET_OFFSET 1 + +struct ec_response_motion_sensor_data { + /* Flags for each sensor. */ + uint8_t flags; + /* Sensor number the data comes from */ + uint8_t sensor_num; + /* Each sensor is up to 3-axis. */ + union { + int16_t data[3]; + struct { + uint16_t rsvd; + uint32_t timestamp; + } __packed; + struct { + uint8_t activity; /* motionsensor_activity */ + uint8_t state; + int16_t add_info[2]; + }; + }; +} __packed; + struct ec_params_motion_sense { uint8_t cmd; union { @@ -1378,9 +1427,37 @@ struct ec_params_motion_sense { int16_t data; } ec_rate, kb_wake_angle; + /* Used for MOTIONSENSE_CMD_SENSOR_OFFSET */ + struct { + uint8_t sensor_num; + + /* + * bit 0: If set (MOTION_SENSE_SET_OFFSET), set + * the calibration information in the EC. + * If unset, just retrieve calibration information. + */ + uint16_t flags; + + /* + * Temperature at calibration, in units of 0.01 C + * 0x8000: invalid / unknown. + * 0x0: 0C + * 0x7fff: +327.67C + */ + int16_t temp; + + /* + * Offset for calibration. + * Unit: + * Accelerometer: 1/1024 g + * Gyro: 1/1024 deg/s + * Compass: 1/16 uT + */ + int16_t offset[3]; + } __packed sensor_offset; + /* Used for MOTIONSENSE_CMD_INFO. */ struct { - /* Should be element of enum motionsensor_id. */ uint8_t sensor_num; } info; @@ -1410,11 +1487,14 @@ struct ec_response_motion_sense { /* Flags representing the motion sensor module. */ uint8_t module_flags; - /* Flags for each sensor in enum motionsensor_id. */ - uint8_t sensor_flags[EC_MOTION_SENSOR_COUNT]; + /* Number of sensors managed directly by the EC. */ + uint8_t sensor_count; - /* Array of all sensor data. Each sensor is 3-axis. */ - int16_t data[3*EC_MOTION_SENSOR_COUNT]; + /* + * Sensor data is truncated if response_max is too small + * for holding all the data. + */ + struct ec_response_motion_sensor_data sensor[0]; } dump; /* Used for MOTIONSENSE_CMD_INFO. */ @@ -1429,6 +1509,9 @@ struct ec_response_motion_sense { uint8_t chip; } info; + /* Used for MOTIONSENSE_CMD_DATA */ + struct ec_response_motion_sensor_data data; + /* * Used for MOTIONSENSE_CMD_EC_RATE, MOTIONSENSE_CMD_SENSOR_ODR, * MOTIONSENSE_CMD_SENSOR_RANGE, and @@ -1438,6 +1521,12 @@ struct ec_response_motion_sense { /* Current value of the parameter queried. */ int32_t ret; } ec_rate, sensor_odr, sensor_range, kb_wake_angle; + + /* Used for MOTIONSENSE_CMD_SENSOR_OFFSET */ + struct { + int16_t temp; + int16_t offset[3]; + } sensor_offset, perform_calib; }; } __packed; -- cgit v1.2.3 From e4244ebddae27e9200146bba897f12a3950ce722 Mon Sep 17 00:00:00 2001 From: Vincent Palatin Date: Mon, 1 Aug 2016 11:54:37 +0200 Subject: platform/chrome: Introduce a new function to check EC features. Use the EC_CMD_GET_FEATURES message to check the supported features for each MCU. Signed-off-by: Vincent Palatin [tomeu: adapted to changes in mainline] Signed-off-by: Tomeu Vizoso [enric: remove references to USB PD feature and do it more generic] Signed-off-by: Enric Balletbo i Serra Reviewed-by: Guenter Roeck For the MFD changes: Acked-by: Lee Jones Signed-off-by: Jonathan Cameron --- drivers/platform/chrome/cros_ec_dev.c | 37 +++++++++++++++ include/linux/mfd/cros_ec.h | 1 + include/linux/mfd/cros_ec_commands.h | 84 +++++++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+) (limited to 'include/linux') diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c index 8abd80dbcbed..7eb53078b03d 100644 --- a/drivers/platform/chrome/cros_ec_dev.c +++ b/drivers/platform/chrome/cros_ec_dev.c @@ -87,6 +87,41 @@ exit: return ret; } +static int cros_ec_check_features(struct cros_ec_dev *ec, int feature) +{ + struct cros_ec_command *msg; + int ret; + + if (ec->features[0] == -1U && ec->features[1] == -1U) { + /* features bitmap not read yet */ + + msg = kmalloc(sizeof(*msg) + sizeof(ec->features), GFP_KERNEL); + if (!msg) + return -ENOMEM; + + msg->version = 0; + msg->command = EC_CMD_GET_FEATURES + ec->cmd_offset; + msg->insize = sizeof(ec->features); + msg->outsize = 0; + + ret = cros_ec_cmd_xfer(ec->ec_dev, msg); + if (ret < 0 || msg->result != EC_RES_SUCCESS) { + dev_warn(ec->dev, "cannot get EC features: %d/%d\n", + ret, msg->result); + memset(ec->features, 0, sizeof(ec->features)); + } + + memcpy(ec->features, msg->data, sizeof(ec->features)); + + dev_dbg(ec->dev, "EC features %08x %08x\n", + ec->features[0], ec->features[1]); + + kfree(msg); + } + + return ec->features[feature / 32] & EC_FEATURE_MASK_0(feature); +} + /* Device file ops */ static int ec_device_open(struct inode *inode, struct file *filp) { @@ -245,6 +280,8 @@ static int ec_device_probe(struct platform_device *pdev) ec->ec_dev = dev_get_drvdata(dev->parent); ec->dev = dev; ec->cmd_offset = ec_platform->cmd_offset; + ec->features[0] = -1U; /* Not cached yet */ + ec->features[1] = -1U; /* Not cached yet */ device_initialize(&ec->class_dev); cdev_init(&ec->cdev, &fops); diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 1f85b7aff097..f62043a75f43 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -184,6 +184,7 @@ struct cros_ec_dev { struct cros_ec_device *ec_dev; struct device *dev; u16 cmd_offset; + u32 features[2]; }; /** diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h index 8826e0f64b0e..1683003603f3 100644 --- a/include/linux/mfd/cros_ec_commands.h +++ b/include/linux/mfd/cros_ec_commands.h @@ -713,6 +713,90 @@ struct ec_response_get_set_value { /* More than one command can use these structs to get/set paramters. */ #define EC_CMD_GSV_PAUSE_IN_S5 0x0c +/*****************************************************************************/ +/* List the features supported by the firmware */ +#define EC_CMD_GET_FEATURES 0x0d + +/* Supported features */ +enum ec_feature_code { + /* + * This image contains a limited set of features. Another image + * in RW partition may support more features. + */ + EC_FEATURE_LIMITED = 0, + /* + * Commands for probing/reading/writing/erasing the flash in the + * EC are present. + */ + EC_FEATURE_FLASH = 1, + /* + * Can control the fan speed directly. + */ + EC_FEATURE_PWM_FAN = 2, + /* + * Can control the intensity of the keyboard backlight. + */ + EC_FEATURE_PWM_KEYB = 3, + /* + * Support Google lightbar, introduced on Pixel. + */ + EC_FEATURE_LIGHTBAR = 4, + /* Control of LEDs */ + EC_FEATURE_LED = 5, + /* Exposes an interface to control gyro and sensors. + * The host goes through the EC to access these sensors. + * In addition, the EC may provide composite sensors, like lid angle. + */ + EC_FEATURE_MOTION_SENSE = 6, + /* The keyboard is controlled by the EC */ + EC_FEATURE_KEYB = 7, + /* The AP can use part of the EC flash as persistent storage. */ + EC_FEATURE_PSTORE = 8, + /* The EC monitors BIOS port 80h, and can return POST codes. */ + EC_FEATURE_PORT80 = 9, + /* + * Thermal management: include TMP specific commands. + * Higher level than direct fan control. + */ + EC_FEATURE_THERMAL = 10, + /* Can switch the screen backlight on/off */ + EC_FEATURE_BKLIGHT_SWITCH = 11, + /* Can switch the wifi module on/off */ + EC_FEATURE_WIFI_SWITCH = 12, + /* Monitor host events, through for example SMI or SCI */ + EC_FEATURE_HOST_EVENTS = 13, + /* The EC exposes GPIO commands to control/monitor connected devices. */ + EC_FEATURE_GPIO = 14, + /* The EC can send i2c messages to downstream devices. */ + EC_FEATURE_I2C = 15, + /* Command to control charger are included */ + EC_FEATURE_CHARGER = 16, + /* Simple battery support. */ + EC_FEATURE_BATTERY = 17, + /* + * Support Smart battery protocol + * (Common Smart Battery System Interface Specification) + */ + EC_FEATURE_SMART_BATTERY = 18, + /* EC can dectect when the host hangs. */ + EC_FEATURE_HANG_DETECT = 19, + /* Report power information, for pit only */ + EC_FEATURE_PMU = 20, + /* Another Cros EC device is present downstream of this one */ + EC_FEATURE_SUB_MCU = 21, + /* Support USB Power delivery (PD) commands */ + EC_FEATURE_USB_PD = 22, + /* Control USB multiplexer, for audio through USB port for instance. */ + EC_FEATURE_USB_MUX = 23, + /* Motion Sensor code has an internal software FIFO */ + EC_FEATURE_MOTION_SENSE_FIFO = 24, +}; + +#define EC_FEATURE_MASK_0(event_code) (1UL << (event_code % 32)) +#define EC_FEATURE_MASK_1(event_code) (1UL << (event_code - 32)) +struct ec_response_get_features { + uint32_t flags[2]; +} __packed; /*****************************************************************************/ /* Flash commands */ -- cgit v1.2.3 From 0aced355757ddc150f78a6bf4f8d885bd4eaf0e2 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Mon, 19 Sep 2016 13:09:02 +0530 Subject: mfd: tps65218: Remove redundant read wrapper Currently read directly calls the repmap read function. Hence remove the redundant wrapper and use regmap read wherever needed. Signed-off-by: Keerthy Signed-off-by: Lee Jones --- drivers/gpio/gpio-tps65218.c | 3 ++- drivers/mfd/tps65218.c | 18 ++---------------- drivers/regulator/tps65218-regulator.c | 5 +++-- include/linux/mfd/tps65218.h | 2 -- 4 files changed, 7 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/gpio-tps65218.c b/drivers/gpio/gpio-tps65218.c index d779307a9685..46e6dcc089cb 100644 --- a/drivers/gpio/gpio-tps65218.c +++ b/drivers/gpio/gpio-tps65218.c @@ -16,6 +16,7 @@ #include #include #include +#include #include struct tps65218_gpio { @@ -30,7 +31,7 @@ static int tps65218_gpio_get(struct gpio_chip *gc, unsigned offset) unsigned int val; int ret; - ret = tps65218_reg_read(tps65218, TPS65218_REG_ENABLE2, &val); + ret = regmap_read(tps65218->regmap, TPS65218_REG_ENABLE2, &val); if (ret) return ret; diff --git a/drivers/mfd/tps65218.c b/drivers/mfd/tps65218.c index ba610adbdbff..9bca1b1b60ce 100644 --- a/drivers/mfd/tps65218.c +++ b/drivers/mfd/tps65218.c @@ -33,20 +33,6 @@ #define TPS65218_PASSWORD_REGS_UNLOCK 0x7D -/** - * tps65218_reg_read: Read a single tps65218 register. - * - * @tps: Device to read from. - * @reg: Register to read. - * @val: Contians the value - */ -int tps65218_reg_read(struct tps65218 *tps, unsigned int reg, - unsigned int *val) -{ - return regmap_read(tps->regmap, reg, val); -} -EXPORT_SYMBOL_GPL(tps65218_reg_read); - /** * tps65218_reg_write: Write a single tps65218 register. * @@ -93,7 +79,7 @@ static int tps65218_update_bits(struct tps65218 *tps, unsigned int reg, int ret; unsigned int data; - ret = tps65218_reg_read(tps, reg, &data); + ret = regmap_read(tps->regmap, reg, &data); if (ret) { dev_err(tps->dev, "Read from reg 0x%x failed\n", reg); return ret; @@ -251,7 +237,7 @@ static int tps65218_probe(struct i2c_client *client, if (ret < 0) return ret; - ret = tps65218_reg_read(tps, TPS65218_REG_CHIPID, &chipid); + ret = regmap_read(tps->regmap, TPS65218_REG_CHIPID, &chipid); if (ret) { dev_err(tps->dev, "Failed to read chipid: %d\n", ret); return ret; diff --git a/drivers/regulator/tps65218-regulator.c b/drivers/regulator/tps65218-regulator.c index eb0f5b13841a..ae16caf4151c 100644 --- a/drivers/regulator/tps65218-regulator.c +++ b/drivers/regulator/tps65218-regulator.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -272,7 +273,7 @@ static int tps65218_pmic_get_current_limit(struct regulator_dev *dev) unsigned int index; struct tps65218 *tps = rdev_get_drvdata(dev); - retval = tps65218_reg_read(tps, dev->desc->csel_reg, &index); + retval = regmap_read(tps->regmap, dev->desc->csel_reg, &index); if (retval < 0) return retval; @@ -383,7 +384,7 @@ static int tps65218_regulator_probe(struct platform_device *pdev) return PTR_ERR(rdev); } - ret = tps65218_reg_read(tps, regulators[id].bypass_reg, &val); + ret = regmap_read(tps->regmap, regulators[id].bypass_reg, &val); if (ret) return ret; diff --git a/include/linux/mfd/tps65218.h b/include/linux/mfd/tps65218.h index d1db9527fab5..51bef539091c 100644 --- a/include/linux/mfd/tps65218.h +++ b/include/linux/mfd/tps65218.h @@ -284,8 +284,6 @@ struct tps65218 { struct regmap *regmap; }; -int tps65218_reg_read(struct tps65218 *tps, unsigned int reg, - unsigned int *val); int tps65218_reg_write(struct tps65218 *tps, unsigned int reg, unsigned int val, unsigned int level); int tps65218_set_bits(struct tps65218 *tps, unsigned int reg, -- cgit v1.2.3 From 2dc4940360d4c0c38aa9275532c7c0d7542f6258 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Mon, 19 Sep 2016 13:09:06 +0530 Subject: regulator: tps65218: Remove all the compatibles Remove all the individual compatibles for all the regulators and introduce id_table and update the driver accordingly to parse device tree nodes using the regulator framework. Signed-off-by: Keerthy Acked-by: Mark Brown Signed-off-by: Lee Jones --- drivers/regulator/tps65218-regulator.c | 150 ++++++++++++--------------------- include/linux/mfd/tps65218.h | 1 + 2 files changed, 57 insertions(+), 94 deletions(-) (limited to 'include/linux') diff --git a/drivers/regulator/tps65218-regulator.c b/drivers/regulator/tps65218-regulator.c index ae16caf4151c..9aafbb03482d 100644 --- a/drivers/regulator/tps65218-regulator.c +++ b/drivers/regulator/tps65218-regulator.c @@ -31,10 +31,11 @@ enum tps65218_regulators { DCDC1, DCDC2, DCDC3, DCDC4, DCDC5, DCDC6, LDO1, LS3 }; -#define TPS65218_REGULATOR(_name, _id, _type, _ops, _n, _vr, _vm, _er, _em, \ - _cr, _cm, _lr, _nlr, _delay, _fuv, _sr, _sm) \ +#define TPS65218_REGULATOR(_name, _of, _id, _type, _ops, _n, _vr, _vm, _er, \ + _em, _cr, _cm, _lr, _nlr, _delay, _fuv, _sr, _sm) \ { \ .name = _name, \ + .of_match = _of, \ .id = _id, \ .ops = &_ops, \ .n_voltages = _n, \ @@ -55,14 +56,6 @@ enum tps65218_regulators { DCDC1, DCDC2, DCDC3, DCDC4, .bypass_mask = _sm, \ } \ -#define TPS65218_INFO(_id, _nm, _min, _max) \ - [_id] = { \ - .id = _id, \ - .name = _nm, \ - .min_uV = _min, \ - .max_uV = _max, \ - } - static const struct regulator_linear_range dcdc1_dcdc2_ranges[] = { REGULATOR_LINEAR_RANGE(850000, 0x0, 0x32, 10000), REGULATOR_LINEAR_RANGE(1375000, 0x33, 0x3f, 25000), @@ -78,36 +71,6 @@ static const struct regulator_linear_range dcdc4_ranges[] = { REGULATOR_LINEAR_RANGE(1600000, 0x10, 0x34, 50000), }; -static struct tps_info tps65218_pmic_regs[] = { - TPS65218_INFO(DCDC1, "DCDC1", 850000, 1675000), - TPS65218_INFO(DCDC2, "DCDC2", 850000, 1675000), - TPS65218_INFO(DCDC3, "DCDC3", 900000, 3400000), - TPS65218_INFO(DCDC4, "DCDC4", 1175000, 3400000), - TPS65218_INFO(DCDC5, "DCDC5", 1000000, 1000000), - TPS65218_INFO(DCDC6, "DCDC6", 1800000, 1800000), - TPS65218_INFO(LDO1, "LDO1", 900000, 3400000), - TPS65218_INFO(LS3, "LS3", -1, -1), -}; - -#define TPS65218_OF_MATCH(comp, label) \ - { \ - .compatible = comp, \ - .data = &label, \ - } - -static const struct of_device_id tps65218_of_match[] = { - TPS65218_OF_MATCH("ti,tps65218-dcdc1", tps65218_pmic_regs[DCDC1]), - TPS65218_OF_MATCH("ti,tps65218-dcdc2", tps65218_pmic_regs[DCDC2]), - TPS65218_OF_MATCH("ti,tps65218-dcdc3", tps65218_pmic_regs[DCDC3]), - TPS65218_OF_MATCH("ti,tps65218-dcdc4", tps65218_pmic_regs[DCDC4]), - TPS65218_OF_MATCH("ti,tps65218-dcdc5", tps65218_pmic_regs[DCDC5]), - TPS65218_OF_MATCH("ti,tps65218-dcdc6", tps65218_pmic_regs[DCDC6]), - TPS65218_OF_MATCH("ti,tps65218-ldo1", tps65218_pmic_regs[LDO1]), - TPS65218_OF_MATCH("ti,tps65218-ls3", tps65218_pmic_regs[LS3]), - { } -}; -MODULE_DEVICE_TABLE(of, tps65218_of_match); - static int tps65218_pmic_set_voltage_sel(struct regulator_dev *dev, unsigned selector) { @@ -189,7 +152,7 @@ static int tps65218_pmic_set_suspend_disable(struct regulator_dev *dev) if (rid == TPS65218_DCDC_3 && tps->rev == TPS65218_REV_2_1) return 0; - if (!tps->info[rid]->strobe) { + if (!tps->strobes[rid]) { if (rid == TPS65218_DCDC_3) tps->info[rid]->strobe = 3; else @@ -198,8 +161,7 @@ static int tps65218_pmic_set_suspend_disable(struct regulator_dev *dev) return tps65218_set_bits(tps, dev->desc->bypass_reg, dev->desc->bypass_mask, - tps->info[rid]->strobe, - TPS65218_PROTECT_L1); + tps->strobes[rid], TPS65218_PROTECT_L1); } /* Operations permitted on DCDC1, DCDC2 */ @@ -301,104 +263,104 @@ static struct regulator_ops tps65218_dcdc56_pmic_ops = { }; static const struct regulator_desc regulators[] = { - TPS65218_REGULATOR("DCDC1", TPS65218_DCDC_1, REGULATOR_VOLTAGE, - tps65218_dcdc12_ops, 64, TPS65218_REG_CONTROL_DCDC1, + TPS65218_REGULATOR("DCDC1", "regulator-dcdc1", TPS65218_DCDC_1, + REGULATOR_VOLTAGE, tps65218_dcdc12_ops, 64, + TPS65218_REG_CONTROL_DCDC1, TPS65218_CONTROL_DCDC1_MASK, TPS65218_REG_ENABLE1, TPS65218_ENABLE1_DC1_EN, 0, 0, dcdc1_dcdc2_ranges, 2, 4000, 0, TPS65218_REG_SEQ3, TPS65218_SEQ3_DC1_SEQ_MASK), - TPS65218_REGULATOR("DCDC2", TPS65218_DCDC_2, REGULATOR_VOLTAGE, - tps65218_dcdc12_ops, 64, TPS65218_REG_CONTROL_DCDC2, + TPS65218_REGULATOR("DCDC2", "regulator-dcdc2", TPS65218_DCDC_2, + REGULATOR_VOLTAGE, tps65218_dcdc12_ops, 64, + TPS65218_REG_CONTROL_DCDC2, TPS65218_CONTROL_DCDC2_MASK, TPS65218_REG_ENABLE1, TPS65218_ENABLE1_DC2_EN, 0, 0, dcdc1_dcdc2_ranges, 2, 4000, 0, TPS65218_REG_SEQ3, TPS65218_SEQ3_DC2_SEQ_MASK), - TPS65218_REGULATOR("DCDC3", TPS65218_DCDC_3, REGULATOR_VOLTAGE, - tps65218_ldo1_dcdc34_ops, 64, + TPS65218_REGULATOR("DCDC3", "regulator-dcdc3", TPS65218_DCDC_3, + REGULATOR_VOLTAGE, tps65218_ldo1_dcdc34_ops, 64, TPS65218_REG_CONTROL_DCDC3, TPS65218_CONTROL_DCDC3_MASK, TPS65218_REG_ENABLE1, TPS65218_ENABLE1_DC3_EN, 0, 0, ldo1_dcdc3_ranges, 2, 0, 0, TPS65218_REG_SEQ4, TPS65218_SEQ4_DC3_SEQ_MASK), - TPS65218_REGULATOR("DCDC4", TPS65218_DCDC_4, REGULATOR_VOLTAGE, - tps65218_ldo1_dcdc34_ops, 53, + TPS65218_REGULATOR("DCDC4", "regulator-dcdc4", TPS65218_DCDC_4, + REGULATOR_VOLTAGE, tps65218_ldo1_dcdc34_ops, 53, TPS65218_REG_CONTROL_DCDC4, TPS65218_CONTROL_DCDC4_MASK, TPS65218_REG_ENABLE1, TPS65218_ENABLE1_DC4_EN, 0, 0, dcdc4_ranges, 2, 0, 0, TPS65218_REG_SEQ4, TPS65218_SEQ4_DC4_SEQ_MASK), - TPS65218_REGULATOR("DCDC5", TPS65218_DCDC_5, REGULATOR_VOLTAGE, - tps65218_dcdc56_pmic_ops, 1, -1, -1, - TPS65218_REG_ENABLE1, TPS65218_ENABLE1_DC5_EN, 0, 0, - NULL, 0, 0, 1000000, TPS65218_REG_SEQ5, + TPS65218_REGULATOR("DCDC5", "regulator-dcdc5", TPS65218_DCDC_5, + REGULATOR_VOLTAGE, tps65218_dcdc56_pmic_ops, 1, -1, + -1, TPS65218_REG_ENABLE1, TPS65218_ENABLE1_DC5_EN, 0, + 0, NULL, 0, 0, 1000000, TPS65218_REG_SEQ5, TPS65218_SEQ5_DC5_SEQ_MASK), - TPS65218_REGULATOR("DCDC6", TPS65218_DCDC_6, REGULATOR_VOLTAGE, - tps65218_dcdc56_pmic_ops, 1, -1, -1, - TPS65218_REG_ENABLE1, TPS65218_ENABLE1_DC6_EN, 0, 0, - NULL, 0, 0, 1800000, TPS65218_REG_SEQ5, + TPS65218_REGULATOR("DCDC6", "regulator-dcdc6", TPS65218_DCDC_6, + REGULATOR_VOLTAGE, tps65218_dcdc56_pmic_ops, 1, -1, + -1, TPS65218_REG_ENABLE1, TPS65218_ENABLE1_DC6_EN, 0, + 0, NULL, 0, 0, 1800000, TPS65218_REG_SEQ5, TPS65218_SEQ5_DC6_SEQ_MASK), - TPS65218_REGULATOR("LDO1", TPS65218_LDO_1, REGULATOR_VOLTAGE, - tps65218_ldo1_dcdc34_ops, 64, + TPS65218_REGULATOR("LDO1", "regulator-ldo1", TPS65218_LDO_1, + REGULATOR_VOLTAGE, tps65218_ldo1_dcdc34_ops, 64, TPS65218_REG_CONTROL_LDO1, TPS65218_CONTROL_LDO1_MASK, TPS65218_REG_ENABLE2, TPS65218_ENABLE2_LDO1_EN, 0, 0, ldo1_dcdc3_ranges, 2, 0, 0, TPS65218_REG_SEQ6, TPS65218_SEQ6_LDO1_SEQ_MASK), - TPS65218_REGULATOR("LS3", TPS65218_LS_3, REGULATOR_CURRENT, - tps65218_ls3_ops, 0, 0, 0, TPS65218_REG_ENABLE2, - TPS65218_ENABLE2_LS3_EN, TPS65218_REG_CONFIG2, - TPS65218_CONFIG2_LS3ILIM_MASK, NULL, 0, 0, 0, 0, 0), + TPS65218_REGULATOR("LS3", "regulator-ls3", TPS65218_LS_3, + REGULATOR_CURRENT, tps65218_ls3_ops, 0, 0, 0, + TPS65218_REG_ENABLE2, TPS65218_ENABLE2_LS3_EN, + TPS65218_REG_CONFIG2, TPS65218_CONFIG2_LS3ILIM_MASK, + NULL, 0, 0, 0, 0, 0), }; static int tps65218_regulator_probe(struct platform_device *pdev) { struct tps65218 *tps = dev_get_drvdata(pdev->dev.parent); - struct regulator_init_data *init_data; - const struct tps_info *template; struct regulator_dev *rdev; - const struct of_device_id *match; struct regulator_config config = { }; - int id, ret; + int i, ret; unsigned int val; - match = of_match_device(tps65218_of_match, &pdev->dev); - if (!match) - return -ENODEV; - - template = match->data; - id = template->id; - init_data = of_get_regulator_init_data(&pdev->dev, pdev->dev.of_node, - ®ulators[id]); - - platform_set_drvdata(pdev, tps); - - tps->info[id] = &tps65218_pmic_regs[id]; config.dev = &pdev->dev; - config.init_data = init_data; + config.dev->of_node = tps->dev->of_node; config.driver_data = tps; config.regmap = tps->regmap; - config.of_node = pdev->dev.of_node; - rdev = devm_regulator_register(&pdev->dev, ®ulators[id], &config); - if (IS_ERR(rdev)) { - dev_err(tps->dev, "failed to register %s regulator\n", - pdev->name); - return PTR_ERR(rdev); - } + /* Allocate memory for strobes */ + tps->strobes = devm_kzalloc(&pdev->dev, sizeof(u8) * + TPS65218_NUM_REGULATOR, GFP_KERNEL); - ret = regmap_read(tps->regmap, regulators[id].bypass_reg, &val); - if (ret) - return ret; + for (i = 0; i < ARRAY_SIZE(regulators); i++) { + rdev = devm_regulator_register(&pdev->dev, ®ulators[i], + &config); + if (IS_ERR(rdev)) { + dev_err(tps->dev, "failed to register %s regulator\n", + pdev->name); + return PTR_ERR(rdev); + } - tps->info[id]->strobe = val & regulators[id].bypass_mask; + ret = regmap_read(tps->regmap, regulators[i].bypass_reg, &val); + if (ret) + return ret; + + tps->strobes[i] = val & regulators[i].bypass_mask; + } return 0; } +static const struct platform_device_id tps65218_regulator_id_table[] = { + { "tps65218-regulator", }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(platform, tps65218_regulator_id_table); + static struct platform_driver tps65218_regulator_driver = { .driver = { .name = "tps65218-pmic", - .of_match_table = tps65218_of_match, }, .probe = tps65218_regulator_probe, + .id_table = tps65218_regulator_id_table, }; module_platform_driver(tps65218_regulator_driver); diff --git a/include/linux/mfd/tps65218.h b/include/linux/mfd/tps65218.h index 51bef539091c..bccd2d68b1e3 100644 --- a/include/linux/mfd/tps65218.h +++ b/include/linux/mfd/tps65218.h @@ -282,6 +282,7 @@ struct tps65218 { struct regulator_desc desc[TPS65218_NUM_REGULATOR]; struct tps_info *info[TPS65218_NUM_REGULATOR]; struct regmap *regmap; + u8 *strobes; }; int tps65218_reg_write(struct tps65218 *tps, unsigned int reg, -- cgit v1.2.3 From e9a2ea5a1ba09c35258f3663842fb8d8cf2e00c2 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Sat, 22 Oct 2016 06:19:49 -0700 Subject: cacheinfo: Introduce cache id Cache management software needs an id for each instance of a cache of a particular type. The current cacheinfo structure does not provide any information about the underlying hardware so there is no way to expose it. Hardware with cache management features provides means (cpuid, enumeration etc.) to retrieve the hardware id of a particular cache instance. Cache instances which share hardware have the same hardware id. Add an 'id' field to struct cacheinfo to store this information. Expose this information under the /sys/devices/system/cpu/cpu*/cache/index*/ directory as well. Signed-off-by: Fenghua Yu Cc: "Ravi V Shankar" Cc: "Tony Luck" Cc: "David Carrillo-Cisneros" Cc: "Sai Prakhya" Cc: "Peter Zijlstra" Cc: "Stephane Eranian" Cc: "Dave Hansen" Cc: "Shaohua Li" Cc: "Nilay Vaish" Cc: "Vikas Shivappa" Cc: "Ingo Molnar" Cc: "Borislav Petkov" Cc: "H. Peter Anvin" Link: http://lkml.kernel.org/r/1477142405-32078-3-git-send-email-fenghua.yu@intel.com Signed-off-by: Thomas Gleixner --- drivers/base/cacheinfo.c | 5 +++++ include/linux/cacheinfo.h | 3 +++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index e9fd32e91668..00a9688043f4 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -233,6 +233,7 @@ static ssize_t file_name##_show(struct device *dev, \ return sprintf(buf, "%u\n", this_leaf->object); \ } +show_one(id, id); show_one(level, level); show_one(coherency_line_size, coherency_line_size); show_one(number_of_sets, number_of_sets); @@ -314,6 +315,7 @@ static ssize_t write_policy_show(struct device *dev, return n; } +static DEVICE_ATTR_RO(id); static DEVICE_ATTR_RO(level); static DEVICE_ATTR_RO(type); static DEVICE_ATTR_RO(coherency_line_size); @@ -327,6 +329,7 @@ static DEVICE_ATTR_RO(shared_cpu_list); static DEVICE_ATTR_RO(physical_line_partition); static struct attribute *cache_default_attrs[] = { + &dev_attr_id.attr, &dev_attr_type.attr, &dev_attr_level.attr, &dev_attr_shared_cpu_map.attr, @@ -350,6 +353,8 @@ cache_default_attrs_is_visible(struct kobject *kobj, const struct cpumask *mask = &this_leaf->shared_cpu_map; umode_t mode = attr->mode; + if ((attr == &dev_attr_id.attr) && (this_leaf->attributes & CACHE_ID)) + return mode; if ((attr == &dev_attr_type.attr) && this_leaf->type) return mode; if ((attr == &dev_attr_level.attr) && this_leaf->level) diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 2189935075b4..0bcbb674da9d 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -18,6 +18,7 @@ enum cache_type { /** * struct cacheinfo - represent a cache leaf node + * @id: This cache's id. It is unique among caches with the same (type, level). * @type: type of the cache - data, inst or unified * @level: represents the hierarchy in the multi-level cache * @coherency_line_size: size of each cache line usually representing @@ -44,6 +45,7 @@ enum cache_type { * keeping, the remaining members form the core properties of the cache */ struct cacheinfo { + unsigned int id; enum cache_type type; unsigned int level; unsigned int coherency_line_size; @@ -61,6 +63,7 @@ struct cacheinfo { #define CACHE_WRITE_ALLOCATE BIT(3) #define CACHE_ALLOCATE_POLICY_MASK \ (CACHE_READ_ALLOCATE | CACHE_WRITE_ALLOCATE) +#define CACHE_ID BIT(4) struct device_node *of_node; bool disable_sysfs; -- cgit v1.2.3 From 3cf25904fe467aebeaa77d402b6cf3c6c5d6303b Mon Sep 17 00:00:00 2001 From: Xo Wang Date: Fri, 21 Oct 2016 10:20:12 -0700 Subject: net: phy: broadcom: Update Auxiliary Control Register macros Add the RXD-to-RXC skew (delay) time bit in the Miscellaneous Control shadow register and a mask for the shadow selector field. Remove a re-definition of MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL. Signed-off-by: Xo Wang Reviewed-by: Florian Fainelli Reviewed-by: Joel Stanley Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index e3354b74286c..22c4421c916c 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -105,11 +105,12 @@ #define MII_BCM54XX_AUXCTL_ACTL_SMDSP_ENA 0x0800 #define MII_BCM54XX_AUXCTL_MISC_WREN 0x8000 +#define MII_BCM54XX_AUXCTL_MISC_RXD_RXC_SKEW 0x0100 #define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX 0x0200 #define MII_BCM54XX_AUXCTL_MISC_RDSEL_MISC 0x7000 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x0007 -#define MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL 0x0000 +#define MII_BCM54XX_AUXCTL_SHDWSEL_MASK 0x0007 /* * Broadcom LED source encodings. These are used in BCM5461, BCM5481, -- cgit v1.2.3 From d92ead16be405b6d52ff7b366d1c9865ccc684bd Mon Sep 17 00:00:00 2001 From: Xo Wang Date: Fri, 21 Oct 2016 10:20:13 -0700 Subject: net: phy: broadcom: Add support for BCM54612E This PHY has internal delays enabled after reset. This clears the internal delay enables unless the interface specifically requests them. Signed-off-by: Xo Wang Reviewed-by: Florian Fainelli Reviewed-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/brcmphy.h | 1 + 2 files changed, 49 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 870327efccf7..583ef8a2ec8d 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -337,6 +337,41 @@ static int bcm5481_config_aneg(struct phy_device *phydev) return ret; } +static int bcm54612e_config_aneg(struct phy_device *phydev) +{ + int ret; + + /* First, auto-negotiate. */ + ret = genphy_config_aneg(phydev); + + /* Clear TX internal delay unless requested. */ + if ((phydev->interface != PHY_INTERFACE_MODE_RGMII_ID) && + (phydev->interface != PHY_INTERFACE_MODE_RGMII_TXID)) { + /* Disable TXD to GTXCLK clock delay (default set) */ + /* Bit 9 is the only field in shadow register 00011 */ + bcm_phy_write_shadow(phydev, 0x03, 0); + } + + /* Clear RX internal delay unless requested. */ + if ((phydev->interface != PHY_INTERFACE_MODE_RGMII_ID) && + (phydev->interface != PHY_INTERFACE_MODE_RGMII_RXID)) { + u16 reg; + + /* Errata: reads require filling in the write selector field */ + bcm54xx_auxctl_write(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC, + MII_BCM54XX_AUXCTL_MISC_RDSEL_MISC); + reg = phy_read(phydev, MII_BCM54XX_AUX_CTL); + /* Disable RXD to RXC delay (default set) */ + reg &= ~MII_BCM54XX_AUXCTL_MISC_RXD_RXC_SKEW; + /* Clear shadow selector field */ + reg &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MASK; + bcm54xx_auxctl_write(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC, + MII_BCM54XX_AUXCTL_MISC_WREN | reg); + } + + return ret; +} + static int brcm_phy_setbits(struct phy_device *phydev, int reg, int set) { int val; @@ -484,6 +519,18 @@ static struct phy_driver broadcom_drivers[] = { .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, +}, { + .phy_id = PHY_ID_BCM54612E, + .phy_id_mask = 0xfffffff0, + .name = "Broadcom BCM54612E", + .features = PHY_GBIT_FEATURES | + SUPPORTED_Pause | SUPPORTED_Asym_Pause, + .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, + .config_init = bcm54xx_config_init, + .config_aneg = bcm54612e_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = bcm_phy_ack_intr, + .config_intr = bcm_phy_config_intr, }, { .phy_id = PHY_ID_BCM54616S, .phy_id_mask = 0xfffffff0, @@ -600,6 +647,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = { { PHY_ID_BCM5411, 0xfffffff0 }, { PHY_ID_BCM5421, 0xfffffff0 }, { PHY_ID_BCM5461, 0xfffffff0 }, + { PHY_ID_BCM54612E, 0xfffffff0 }, { PHY_ID_BCM54616S, 0xfffffff0 }, { PHY_ID_BCM5464, 0xfffffff0 }, { PHY_ID_BCM5481, 0xfffffff0 }, diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 22c4421c916c..60def78c4e12 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -18,6 +18,7 @@ #define PHY_ID_BCM5421 0x002060e0 #define PHY_ID_BCM5464 0x002060b0 #define PHY_ID_BCM5461 0x002060c0 +#define PHY_ID_BCM54612E 0x03625e60 #define PHY_ID_BCM54616S 0x03625d10 #define PHY_ID_BCM57780 0x03625d90 -- cgit v1.2.3 From aa276781a64a5f15ecc21e920960c5b1f84e5fee Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Tue, 18 Oct 2016 18:08:34 -0500 Subject: firmware: Add basic support for TI System Control Interface (TI-SCI) protocol Texas Instrument's System Control Interface (TI-SCI) Message Protocol is used in Texas Instrument's System on Chip (SoC) such as those in keystone family K2G SoC to communicate between various compute processors with a central system controller entity. TI-SCI message protocol provides support for management of various hardware entities within the SoC. Add support driver to allow communication with system controller entity within the SoC using the mailbox client. We introduce the basic registration and query capability for the driver protocol as part of this change. Subsequent patches add in functionality specific to the TI-SCI features. Signed-off-by: Nishanth Menon Signed-off-by: Tero Kristo --- MAINTAINERS | 2 + drivers/firmware/Kconfig | 15 + drivers/firmware/Makefile | 1 + drivers/firmware/ti_sci.c | 790 +++++++++++++++++++++++++++++++++ drivers/firmware/ti_sci.h | 93 ++++ include/linux/soc/ti/ti_sci_protocol.h | 69 +++ 6 files changed, 970 insertions(+) create mode 100644 drivers/firmware/ti_sci.c create mode 100644 drivers/firmware/ti_sci.h create mode 100644 include/linux/soc/ti/ti_sci_protocol.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 2bafadf5747a..467b29fafaca 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11890,6 +11890,8 @@ M: Santosh Shilimkar L: linux-arm-kernel@lists.infradead.org S: Maintained F: Documentation/devicetree/bindings/arm/keystone/ti,sci.txt +F: drivers/firmware/ti_sci* +F: include/linux/soc/ti/ti_sci_protocol.h THANKO'S RAREMONO AM/FM/SW RADIO RECEIVER USB DRIVER M: Hans Verkuil diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index bca172d42c74..9418d7d1b0a5 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -203,6 +203,21 @@ config QCOM_SCM_64 def_bool y depends on QCOM_SCM && ARM64 +config TI_SCI_PROTOCOL + tristate "TI System Control Interface (TISCI) Message Protocol" + depends on TI_MESSAGE_MANAGER + help + TI System Control Interface (TISCI) Message Protocol is used to manage + compute systems such as ARM, DSP etc with the system controller in + complex System on Chip(SoC) such as those found on certain keystone + generation SoC from TI. + + System controller provides various facilities including power + management function support. + + This protocol library is used by client drivers to use the features + provided by the system controller. + config HAVE_ARM_SMCCC bool diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile index 898ac41fa8b3..dcb52c423151 100644 --- a/drivers/firmware/Makefile +++ b/drivers/firmware/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_QCOM_SCM) += qcom_scm.o obj-$(CONFIG_QCOM_SCM_64) += qcom_scm-64.o obj-$(CONFIG_QCOM_SCM_32) += qcom_scm-32.o CFLAGS_qcom_scm-32.o :=$(call as-instr,.arch armv7-a\n.arch_extension sec,-DREQUIRES_SEC=1) -march=armv7-a +obj-$(CONFIG_TI_SCI_PROTOCOL) += ti_sci.o obj-y += broadcom/ obj-y += meson/ diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c new file mode 100644 index 000000000000..5e99d7c18276 --- /dev/null +++ b/drivers/firmware/ti_sci.c @@ -0,0 +1,790 @@ +/* + * Texas Instruments System Control Interface Protocol Driver + * + * Copyright (C) 2015-2016 Texas Instruments Incorporated - http://www.ti.com/ + * Nishanth Menon + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) "%s: " fmt, __func__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ti_sci.h" + +/* List of all TI SCI devices active in system */ +static LIST_HEAD(ti_sci_list); +/* Protection for the entire list */ +static DEFINE_MUTEX(ti_sci_list_mutex); + +/** + * struct ti_sci_xfer - Structure representing a message flow + * @tx_message: Transmit message + * @rx_len: Receive message length + * @xfer_buf: Preallocated buffer to store receive message + * Since we work with request-ACK protocol, we can + * reuse the same buffer for the rx path as we + * use for the tx path. + * @done: completion event + */ +struct ti_sci_xfer { + struct ti_msgmgr_message tx_message; + u8 rx_len; + u8 *xfer_buf; + struct completion done; +}; + +/** + * struct ti_sci_xfers_info - Structure to manage transfer information + * @sem_xfer_count: Counting Semaphore for managing max simultaneous + * Messages. + * @xfer_block: Preallocated Message array + * @xfer_alloc_table: Bitmap table for allocated messages. + * Index of this bitmap table is also used for message + * sequence identifier. + * @xfer_lock: Protection for message allocation + */ +struct ti_sci_xfers_info { + struct semaphore sem_xfer_count; + struct ti_sci_xfer *xfer_block; + unsigned long *xfer_alloc_table; + /* protect transfer allocation */ + spinlock_t xfer_lock; +}; + +/** + * struct ti_sci_desc - Description of SoC integration + * @host_id: Host identifier representing the compute entity + * @max_rx_timeout_ms: Timeout for communication with SoC (in Milliseconds) + * @max_msgs: Maximum number of messages that can be pending + * simultaneously in the system + * @max_msg_size: Maximum size of data per message that can be handled. + */ +struct ti_sci_desc { + u8 host_id; + int max_rx_timeout_ms; + int max_msgs; + int max_msg_size; +}; + +/** + * struct ti_sci_info - Structure representing a TI SCI instance + * @dev: Device pointer + * @desc: SoC description for this instance + * @d: Debugfs file entry + * @debug_region: Memory region where the debug message are available + * @debug_region_size: Debug region size + * @debug_buffer: Buffer allocated to copy debug messages. + * @handle: Instance of TI SCI handle to send to clients. + * @cl: Mailbox Client + * @chan_tx: Transmit mailbox channel + * @chan_rx: Receive mailbox channel + * @minfo: Message info + * @node: list head + * @users: Number of users of this instance + */ +struct ti_sci_info { + struct device *dev; + const struct ti_sci_desc *desc; + struct dentry *d; + void __iomem *debug_region; + char *debug_buffer; + size_t debug_region_size; + struct ti_sci_handle handle; + struct mbox_client cl; + struct mbox_chan *chan_tx; + struct mbox_chan *chan_rx; + struct ti_sci_xfers_info minfo; + struct list_head node; + /* protected by ti_sci_list_mutex */ + int users; +}; + +#define cl_to_ti_sci_info(c) container_of(c, struct ti_sci_info, cl) +#define handle_to_ti_sci_info(h) container_of(h, struct ti_sci_info, handle) + +#ifdef CONFIG_DEBUG_FS + +/** + * ti_sci_debug_show() - Helper to dump the debug log + * @s: sequence file pointer + * @unused: unused. + * + * Return: 0 + */ +static int ti_sci_debug_show(struct seq_file *s, void *unused) +{ + struct ti_sci_info *info = s->private; + + memcpy_fromio(info->debug_buffer, info->debug_region, + info->debug_region_size); + /* + * We don't trust firmware to leave NULL terminated last byte (hence + * we have allocated 1 extra 0 byte). Since we cannot guarantee any + * specific data format for debug messages, We just present the data + * in the buffer as is - we expect the messages to be self explanatory. + */ + seq_puts(s, info->debug_buffer); + return 0; +} + +/** + * ti_sci_debug_open() - debug file open + * @inode: inode pointer + * @file: file pointer + * + * Return: result of single_open + */ +static int ti_sci_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, ti_sci_debug_show, inode->i_private); +} + +/* log file operations */ +static const struct file_operations ti_sci_debug_fops = { + .open = ti_sci_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/** + * ti_sci_debugfs_create() - Create log debug file + * @pdev: platform device pointer + * @info: Pointer to SCI entity information + * + * Return: 0 if all went fine, else corresponding error. + */ +static int ti_sci_debugfs_create(struct platform_device *pdev, + struct ti_sci_info *info) +{ + struct device *dev = &pdev->dev; + struct resource *res; + char debug_name[50] = "ti_sci_debug@"; + + /* Debug region is optional */ + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + "debug_messages"); + info->debug_region = devm_ioremap_resource(dev, res); + if (IS_ERR(info->debug_region)) + return 0; + info->debug_region_size = resource_size(res); + + info->debug_buffer = devm_kcalloc(dev, info->debug_region_size + 1, + sizeof(char), GFP_KERNEL); + if (!info->debug_buffer) + return -ENOMEM; + /* Setup NULL termination */ + info->debug_buffer[info->debug_region_size] = 0; + + info->d = debugfs_create_file(strncat(debug_name, dev_name(dev), + sizeof(debug_name)), + 0444, NULL, info, &ti_sci_debug_fops); + if (IS_ERR(info->d)) + return PTR_ERR(info->d); + + dev_dbg(dev, "Debug region => %p, size = %zu bytes, resource: %pr\n", + info->debug_region, info->debug_region_size, res); + return 0; +} + +/** + * ti_sci_debugfs_destroy() - clean up log debug file + * @pdev: platform device pointer + * @info: Pointer to SCI entity information + */ +static void ti_sci_debugfs_destroy(struct platform_device *pdev, + struct ti_sci_info *info) +{ + if (IS_ERR(info->debug_region)) + return; + + debugfs_remove(info->d); +} +#else /* CONFIG_DEBUG_FS */ +static inline int ti_sci_debugfs_create(struct platform_device *dev, + struct ti_sci_info *info) +{ + return 0; +} + +static inline void ti_sci_debugfs_destroy(struct platform_device *dev, + struct ti_sci_info *info) +{ +} +#endif /* CONFIG_DEBUG_FS */ + +/** + * ti_sci_dump_header_dbg() - Helper to dump a message header. + * @dev: Device pointer corresponding to the SCI entity + * @hdr: pointer to header. + */ +static inline void ti_sci_dump_header_dbg(struct device *dev, + struct ti_sci_msg_hdr *hdr) +{ + dev_dbg(dev, "MSGHDR:type=0x%04x host=0x%02x seq=0x%02x flags=0x%08x\n", + hdr->type, hdr->host, hdr->seq, hdr->flags); +} + +/** + * ti_sci_rx_callback() - mailbox client callback for receive messages + * @cl: client pointer + * @m: mailbox message + * + * Processes one received message to appropriate transfer information and + * signals completion of the transfer. + * + * NOTE: This function will be invoked in IRQ context, hence should be + * as optimal as possible. + */ +static void ti_sci_rx_callback(struct mbox_client *cl, void *m) +{ + struct ti_sci_info *info = cl_to_ti_sci_info(cl); + struct device *dev = info->dev; + struct ti_sci_xfers_info *minfo = &info->minfo; + struct ti_msgmgr_message *mbox_msg = m; + struct ti_sci_msg_hdr *hdr = (struct ti_sci_msg_hdr *)mbox_msg->buf; + struct ti_sci_xfer *xfer; + u8 xfer_id; + + xfer_id = hdr->seq; + + /* + * Are we even expecting this? + * NOTE: barriers were implicit in locks used for modifying the bitmap + */ + if (!test_bit(xfer_id, minfo->xfer_alloc_table)) { + dev_err(dev, "Message for %d is not expected!\n", xfer_id); + return; + } + + xfer = &minfo->xfer_block[xfer_id]; + + /* Is the message of valid length? */ + if (mbox_msg->len > info->desc->max_msg_size) { + dev_err(dev, "Unable to handle %d xfer(max %d)\n", + mbox_msg->len, info->desc->max_msg_size); + ti_sci_dump_header_dbg(dev, hdr); + return; + } + if (mbox_msg->len < xfer->rx_len) { + dev_err(dev, "Recv xfer %d < expected %d length\n", + mbox_msg->len, xfer->rx_len); + ti_sci_dump_header_dbg(dev, hdr); + return; + } + + ti_sci_dump_header_dbg(dev, hdr); + /* Take a copy to the rx buffer.. */ + memcpy(xfer->xfer_buf, mbox_msg->buf, xfer->rx_len); + complete(&xfer->done); +} + +/** + * ti_sci_get_one_xfer() - Allocate one message + * @info: Pointer to SCI entity information + * @msg_type: Message type + * @msg_flags: Flag to set for the message + * @tx_message_size: transmit message size + * @rx_message_size: receive message size + * + * Helper function which is used by various command functions that are + * exposed to clients of this driver for allocating a message traffic event. + * + * This function can sleep depending on pending requests already in the system + * for the SCI entity. Further, this also holds a spinlock to maintain integrity + * of internal data structures. + * + * Return: 0 if all went fine, else corresponding error. + */ +static struct ti_sci_xfer *ti_sci_get_one_xfer(struct ti_sci_info *info, + u16 msg_type, u32 msg_flags, + size_t tx_message_size, + size_t rx_message_size) +{ + struct ti_sci_xfers_info *minfo = &info->minfo; + struct ti_sci_xfer *xfer; + struct ti_sci_msg_hdr *hdr; + unsigned long flags; + unsigned long bit_pos; + u8 xfer_id; + int ret; + int timeout; + + /* Ensure we have sane transfer sizes */ + if (rx_message_size > info->desc->max_msg_size || + tx_message_size > info->desc->max_msg_size || + rx_message_size < sizeof(*hdr) || tx_message_size < sizeof(*hdr)) + return ERR_PTR(-ERANGE); + + /* + * Ensure we have only controlled number of pending messages. + * Ideally, we might just have to wait a single message, be + * conservative and wait 5 times that.. + */ + timeout = msecs_to_jiffies(info->desc->max_rx_timeout_ms) * 5; + ret = down_timeout(&minfo->sem_xfer_count, timeout); + if (ret < 0) + return ERR_PTR(ret); + + /* Keep the locked section as small as possible */ + spin_lock_irqsave(&minfo->xfer_lock, flags); + bit_pos = find_first_zero_bit(minfo->xfer_alloc_table, + info->desc->max_msgs); + set_bit(bit_pos, minfo->xfer_alloc_table); + spin_unlock_irqrestore(&minfo->xfer_lock, flags); + + /* + * We already ensured in probe that we can have max messages that can + * fit in hdr.seq - NOTE: this improves access latencies + * to predictable O(1) access, BUT, it opens us to risk if + * remote misbehaves with corrupted message sequence responses. + * If that happens, we are going to be messed up anyways.. + */ + xfer_id = (u8)bit_pos; + + xfer = &minfo->xfer_block[xfer_id]; + + hdr = (struct ti_sci_msg_hdr *)xfer->tx_message.buf; + xfer->tx_message.len = tx_message_size; + xfer->rx_len = (u8)rx_message_size; + + reinit_completion(&xfer->done); + + hdr->seq = xfer_id; + hdr->type = msg_type; + hdr->host = info->desc->host_id; + hdr->flags = msg_flags; + + return xfer; +} + +/** + * ti_sci_put_one_xfer() - Release a message + * @minfo: transfer info pointer + * @xfer: message that was reserved by ti_sci_get_one_xfer + * + * This holds a spinlock to maintain integrity of internal data structures. + */ +static void ti_sci_put_one_xfer(struct ti_sci_xfers_info *minfo, + struct ti_sci_xfer *xfer) +{ + unsigned long flags; + struct ti_sci_msg_hdr *hdr; + u8 xfer_id; + + hdr = (struct ti_sci_msg_hdr *)xfer->tx_message.buf; + xfer_id = hdr->seq; + + /* + * Keep the locked section as small as possible + * NOTE: we might escape with smp_mb and no lock here.. + * but just be conservative and symmetric. + */ + spin_lock_irqsave(&minfo->xfer_lock, flags); + clear_bit(xfer_id, minfo->xfer_alloc_table); + spin_unlock_irqrestore(&minfo->xfer_lock, flags); + + /* Increment the count for the next user to get through */ + up(&minfo->sem_xfer_count); +} + +/** + * ti_sci_do_xfer() - Do one transfer + * @info: Pointer to SCI entity information + * @xfer: Transfer to initiate and wait for response + * + * Return: -ETIMEDOUT in case of no response, if transmit error, + * return corresponding error, else if all goes well, + * return 0. + */ +static inline int ti_sci_do_xfer(struct ti_sci_info *info, + struct ti_sci_xfer *xfer) +{ + int ret; + int timeout; + struct device *dev = info->dev; + + ret = mbox_send_message(info->chan_tx, &xfer->tx_message); + if (ret < 0) + return ret; + + ret = 0; + + /* And we wait for the response. */ + timeout = msecs_to_jiffies(info->desc->max_rx_timeout_ms); + if (!wait_for_completion_timeout(&xfer->done, timeout)) { + dev_err(dev, "Mbox timedout in resp(caller: %pF)\n", + (void *)_RET_IP_); + ret = -ETIMEDOUT; + } + /* + * NOTE: we might prefer not to need the mailbox ticker to manage the + * transfer queueing since the protocol layer queues things by itself. + * Unfortunately, we have to kick the mailbox framework after we have + * received our message. + */ + mbox_client_txdone(info->chan_tx, ret); + + return ret; +} + +/** + * ti_sci_cmd_get_revision() - command to get the revision of the SCI entity + * @info: Pointer to SCI entity information + * + * Updates the SCI information in the internal data structure. + * + * Return: 0 if all went fine, else return appropriate error. + */ +static int ti_sci_cmd_get_revision(struct ti_sci_info *info) +{ + struct device *dev = info->dev; + struct ti_sci_handle *handle = &info->handle; + struct ti_sci_version_info *ver = &handle->version; + struct ti_sci_msg_resp_version *rev_info; + struct ti_sci_xfer *xfer; + int ret; + + /* No need to setup flags since it is expected to respond */ + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_VERSION, + 0x0, sizeof(struct ti_sci_msg_hdr), + sizeof(*rev_info)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "Message alloc failed(%d)\n", ret); + return ret; + } + + rev_info = (struct ti_sci_msg_resp_version *)xfer->xfer_buf; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "Mbox send fail %d\n", ret); + goto fail; + } + + ver->abi_major = rev_info->abi_major; + ver->abi_minor = rev_info->abi_minor; + ver->firmware_revision = rev_info->firmware_revision; + strncpy(ver->firmware_description, rev_info->firmware_description, + sizeof(ver->firmware_description)); + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + return ret; +} + +/** + * ti_sci_get_handle() - Get the TI SCI handle for a device + * @dev: Pointer to device for which we want SCI handle + * + * NOTE: The function does not track individual clients of the framework + * and is expected to be maintained by caller of TI SCI protocol library. + * ti_sci_put_handle must be balanced with successful ti_sci_get_handle + * Return: pointer to handle if successful, else: + * -EPROBE_DEFER if the instance is not ready + * -ENODEV if the required node handler is missing + * -EINVAL if invalid conditions are encountered. + */ +const struct ti_sci_handle *ti_sci_get_handle(struct device *dev) +{ + struct device_node *ti_sci_np; + struct list_head *p; + struct ti_sci_handle *handle = NULL; + struct ti_sci_info *info; + + if (!dev) { + pr_err("I need a device pointer\n"); + return ERR_PTR(-EINVAL); + } + ti_sci_np = of_get_parent(dev->of_node); + if (!ti_sci_np) { + dev_err(dev, "No OF information\n"); + return ERR_PTR(-EINVAL); + } + + mutex_lock(&ti_sci_list_mutex); + list_for_each(p, &ti_sci_list) { + info = list_entry(p, struct ti_sci_info, node); + if (ti_sci_np == info->dev->of_node) { + handle = &info->handle; + info->users++; + break; + } + } + mutex_unlock(&ti_sci_list_mutex); + of_node_put(ti_sci_np); + + if (!handle) + return ERR_PTR(-EPROBE_DEFER); + + return handle; +} +EXPORT_SYMBOL_GPL(ti_sci_get_handle); + +/** + * ti_sci_put_handle() - Release the handle acquired by ti_sci_get_handle + * @handle: Handle acquired by ti_sci_get_handle + * + * NOTE: The function does not track individual clients of the framework + * and is expected to be maintained by caller of TI SCI protocol library. + * ti_sci_put_handle must be balanced with successful ti_sci_get_handle + * + * Return: 0 is successfully released + * if an error pointer was passed, it returns the error value back, + * if null was passed, it returns -EINVAL; + */ +int ti_sci_put_handle(const struct ti_sci_handle *handle) +{ + struct ti_sci_info *info; + + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (!handle) + return -EINVAL; + + info = handle_to_ti_sci_info(handle); + mutex_lock(&ti_sci_list_mutex); + if (!WARN_ON(!info->users)) + info->users--; + mutex_unlock(&ti_sci_list_mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(ti_sci_put_handle); + +static void devm_ti_sci_release(struct device *dev, void *res) +{ + const struct ti_sci_handle **ptr = res; + const struct ti_sci_handle *handle = *ptr; + int ret; + + ret = ti_sci_put_handle(handle); + if (ret) + dev_err(dev, "failed to put handle %d\n", ret); +} + +/** + * devm_ti_sci_get_handle() - Managed get handle + * @dev: device for which we want SCI handle for. + * + * NOTE: This releases the handle once the device resources are + * no longer needed. MUST NOT BE released with ti_sci_put_handle. + * The function does not track individual clients of the framework + * and is expected to be maintained by caller of TI SCI protocol library. + * + * Return: 0 if all went fine, else corresponding error. + */ +const struct ti_sci_handle *devm_ti_sci_get_handle(struct device *dev) +{ + const struct ti_sci_handle **ptr; + const struct ti_sci_handle *handle; + + ptr = devres_alloc(devm_ti_sci_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return ERR_PTR(-ENOMEM); + handle = ti_sci_get_handle(dev); + + if (!IS_ERR(handle)) { + *ptr = handle; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return handle; +} +EXPORT_SYMBOL_GPL(devm_ti_sci_get_handle); + +/* Description for K2G */ +static const struct ti_sci_desc ti_sci_pmmc_k2g_desc = { + .host_id = 2, + /* Conservative duration */ + .max_rx_timeout_ms = 1000, + /* Limited by MBOX_TX_QUEUE_LEN. K2G can handle upto 128 messages! */ + .max_msgs = 20, + .max_msg_size = 64, +}; + +static const struct of_device_id ti_sci_of_match[] = { + {.compatible = "ti,k2g-sci", .data = &ti_sci_pmmc_k2g_desc}, + { /* Sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, ti_sci_of_match); + +static int ti_sci_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + const struct of_device_id *of_id; + const struct ti_sci_desc *desc; + struct ti_sci_xfer *xfer; + struct ti_sci_info *info = NULL; + struct ti_sci_xfers_info *minfo; + struct mbox_client *cl; + int ret = -EINVAL; + int i; + + of_id = of_match_device(ti_sci_of_match, dev); + if (!of_id) { + dev_err(dev, "OF data missing\n"); + return -EINVAL; + } + desc = of_id->data; + + info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + info->dev = dev; + info->desc = desc; + INIT_LIST_HEAD(&info->node); + minfo = &info->minfo; + + /* + * Pre-allocate messages + * NEVER allocate more than what we can indicate in hdr.seq + * if we have data description bug, force a fix.. + */ + if (WARN_ON(desc->max_msgs >= + 1 << 8 * sizeof(((struct ti_sci_msg_hdr *)0)->seq))) + return -EINVAL; + + minfo->xfer_block = devm_kcalloc(dev, + desc->max_msgs, + sizeof(*minfo->xfer_block), + GFP_KERNEL); + if (!minfo->xfer_block) + return -ENOMEM; + + minfo->xfer_alloc_table = devm_kzalloc(dev, + BITS_TO_LONGS(desc->max_msgs) + * sizeof(unsigned long), + GFP_KERNEL); + if (!minfo->xfer_alloc_table) + return -ENOMEM; + bitmap_zero(minfo->xfer_alloc_table, desc->max_msgs); + + /* Pre-initialize the buffer pointer to pre-allocated buffers */ + for (i = 0, xfer = minfo->xfer_block; i < desc->max_msgs; i++, xfer++) { + xfer->xfer_buf = devm_kcalloc(dev, 1, desc->max_msg_size, + GFP_KERNEL); + if (!xfer->xfer_buf) + return -ENOMEM; + + xfer->tx_message.buf = xfer->xfer_buf; + init_completion(&xfer->done); + } + + ret = ti_sci_debugfs_create(pdev, info); + if (ret) + dev_warn(dev, "Failed to create debug file\n"); + + platform_set_drvdata(pdev, info); + + cl = &info->cl; + cl->dev = dev; + cl->tx_block = false; + cl->rx_callback = ti_sci_rx_callback; + cl->knows_txdone = true; + + spin_lock_init(&minfo->xfer_lock); + sema_init(&minfo->sem_xfer_count, desc->max_msgs); + + info->chan_rx = mbox_request_channel_byname(cl, "rx"); + if (IS_ERR(info->chan_rx)) { + ret = PTR_ERR(info->chan_rx); + goto out; + } + + info->chan_tx = mbox_request_channel_byname(cl, "tx"); + if (IS_ERR(info->chan_tx)) { + ret = PTR_ERR(info->chan_tx); + goto out; + } + ret = ti_sci_cmd_get_revision(info); + if (ret) { + dev_err(dev, "Unable to communicate with TISCI(%d)\n", ret); + goto out; + } + + dev_info(dev, "ABI: %d.%d (firmware rev 0x%04x '%s')\n", + info->handle.version.abi_major, info->handle.version.abi_minor, + info->handle.version.firmware_revision, + info->handle.version.firmware_description); + + mutex_lock(&ti_sci_list_mutex); + list_add_tail(&info->node, &ti_sci_list); + mutex_unlock(&ti_sci_list_mutex); + + return of_platform_populate(dev->of_node, NULL, NULL, dev); +out: + if (!IS_ERR(info->chan_tx)) + mbox_free_channel(info->chan_tx); + if (!IS_ERR(info->chan_rx)) + mbox_free_channel(info->chan_rx); + debugfs_remove(info->d); + return ret; +} + +static int ti_sci_remove(struct platform_device *pdev) +{ + struct ti_sci_info *info; + struct device *dev = &pdev->dev; + int ret = 0; + + of_platform_depopulate(dev); + + info = platform_get_drvdata(pdev); + + mutex_lock(&ti_sci_list_mutex); + if (info->users) + ret = -EBUSY; + else + list_del(&info->node); + mutex_unlock(&ti_sci_list_mutex); + + if (!ret) { + ti_sci_debugfs_destroy(pdev, info); + + /* Safe to free channels since no more users */ + mbox_free_channel(info->chan_tx); + mbox_free_channel(info->chan_rx); + } + + return ret; +} + +static struct platform_driver ti_sci_driver = { + .probe = ti_sci_probe, + .remove = ti_sci_remove, + .driver = { + .name = "ti-sci", + .of_match_table = of_match_ptr(ti_sci_of_match), + }, +}; +module_platform_driver(ti_sci_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("TI System Control Interface(SCI) driver"); +MODULE_AUTHOR("Nishanth Menon"); +MODULE_ALIAS("platform:ti-sci"); diff --git a/drivers/firmware/ti_sci.h b/drivers/firmware/ti_sci.h new file mode 100644 index 000000000000..e9dc53f26e0e --- /dev/null +++ b/drivers/firmware/ti_sci.h @@ -0,0 +1,93 @@ +/* + * Texas Instruments System Control Interface (TISCI) Protocol + * + * Communication protocol with TI SCI hardware + * The system works in a message response protocol + * See: http://processors.wiki.ti.com/index.php/TISCI for details + * + * Copyright (C) 2015-2016 Texas Instruments Incorporated - http://www.ti.com/ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the + * distribution. + * + * Neither the name of Texas Instruments Incorporated nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef __TI_SCI_H +#define __TI_SCI_H + +/* Generic Messages */ +#define TI_SCI_MSG_ENABLE_WDT 0x0000 +#define TI_SCI_MSG_WAKE_RESET 0x0001 +#define TI_SCI_MSG_VERSION 0x0002 +#define TI_SCI_MSG_WAKE_REASON 0x0003 +#define TI_SCI_MSG_GOODBYE 0x0004 + +/** + * struct ti_sci_msg_hdr - Generic Message Header for All messages and responses + * @type: Type of messages: One of TI_SCI_MSG* values + * @host: Host of the message + * @seq: Message identifier indicating a transfer sequence + * @flags: Flag for the message + */ +struct ti_sci_msg_hdr { + u16 type; + u8 host; + u8 seq; +#define TI_SCI_MSG_FLAG(val) (1 << (val)) +#define TI_SCI_FLAG_REQ_GENERIC_NORESPONSE 0x0 +#define TI_SCI_FLAG_REQ_ACK_ON_RECEIVED TI_SCI_MSG_FLAG(0) +#define TI_SCI_FLAG_REQ_ACK_ON_PROCESSED TI_SCI_MSG_FLAG(1) +#define TI_SCI_FLAG_RESP_GENERIC_NACK 0x0 +#define TI_SCI_FLAG_RESP_GENERIC_ACK TI_SCI_MSG_FLAG(1) + /* Additional Flags */ + u32 flags; +} __packed; + +/** + * struct ti_sci_msg_resp_version - Response for a message + * @hdr: Generic header + * @firmware_description: String describing the firmware + * @firmware_revision: Firmware revision + * @abi_major: Major version of the ABI that firmware supports + * @abi_minor: Minor version of the ABI that firmware supports + * + * In general, ABI version changes follow the rule that minor version increments + * are backward compatible. Major revision changes in ABI may not be + * backward compatible. + * + * Response to a generic message with message type TI_SCI_MSG_VERSION + */ +struct ti_sci_msg_resp_version { + struct ti_sci_msg_hdr hdr; + char firmware_description[32]; + u16 firmware_revision; + u8 abi_major; + u8 abi_minor; +} __packed; + +#endif /* __TI_SCI_H */ diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h new file mode 100644 index 000000000000..e73483fd5327 --- /dev/null +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -0,0 +1,69 @@ +/* + * Texas Instruments System Control Interface Protocol + * + * Copyright (C) 2015-2016 Texas Instruments Incorporated - http://www.ti.com/ + * Nishanth Menon + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __TISCI_PROTOCOL_H +#define __TISCI_PROTOCOL_H + +/** + * struct ti_sci_version_info - version information structure + * @abi_major: Major ABI version. Change here implies risk of backward + * compatibility break. + * @abi_minor: Minor ABI version. Change here implies new feature addition, + * or compatible change in ABI. + * @firmware_revision: Firmware revision (not usually used). + * @firmware_description: Firmware description (not usually used). + */ +struct ti_sci_version_info { + u8 abi_major; + u8 abi_minor; + u16 firmware_revision; + char firmware_description[32]; +}; + +/** + * struct ti_sci_handle - Handle returned to TI SCI clients for usage. + * @version: structure containing version information + */ +struct ti_sci_handle { + struct ti_sci_version_info version; +}; + +#if IS_ENABLED(CONFIG_TI_SCI_PROTOCOL) +const struct ti_sci_handle *ti_sci_get_handle(struct device *dev); +int ti_sci_put_handle(const struct ti_sci_handle *handle); +const struct ti_sci_handle *devm_ti_sci_get_handle(struct device *dev); + +#else /* CONFIG_TI_SCI_PROTOCOL */ + +static inline const struct ti_sci_handle *ti_sci_get_handle(struct device *dev) +{ + return ERR_PTR(-EINVAL); +} + +static inline int ti_sci_put_handle(const struct ti_sci_handle *handle) +{ + return -EINVAL; +} + +static inline +const struct ti_sci_handle *devm_ti_sci_get_handle(struct device *dev) +{ + return ERR_PTR(-EINVAL); +} + +#endif /* CONFIG_TI_SCI_PROTOCOL */ + +#endif /* __TISCI_PROTOCOL_H */ -- cgit v1.2.3 From 9e7d756da7a5b0cc756d1f512f3eaf261834180a Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Tue, 18 Oct 2016 18:08:35 -0500 Subject: firmware: ti_sci: Add support for Device control Texas Instrument's System Control Interface (TI-SCI) Message Protocol is used in Texas Instrument's System on Chip (SoC) such as those in keystone family K2G SoC to communicate between various compute processors with a central system controller entity. TI-SCI message protocol provides support for management of various hardware entitites within the SoC. Add support driver to allow communication with system controller entity within the SoC using the mailbox client. We introduce the fundamental device management capability support to the driver protocol as part of this change. [d-gerlach@ti.com: Contributed device reset handling] Signed-off-by: Dave Gerlach Signed-off-by: Nishanth Menon Signed-off-by: Tero Kristo --- drivers/firmware/ti_sci.c | 433 +++++++++++++++++++++++++++++++++ drivers/firmware/ti_sci.h | 98 ++++++++ include/linux/soc/ti/ti_sci_protocol.h | 91 +++++++ 3 files changed, 622 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c index 5e99d7c18276..c7b25ccf6f07 100644 --- a/drivers/firmware/ti_sci.c +++ b/drivers/firmware/ti_sci.c @@ -494,6 +494,437 @@ fail: return ret; } +/** + * ti_sci_is_response_ack() - Generic ACK/NACK message checkup + * @r: pointer to response buffer + * + * Return: true if the response was an ACK, else returns false. + */ +static inline bool ti_sci_is_response_ack(void *r) +{ + struct ti_sci_msg_hdr *hdr = r; + + return hdr->flags & TI_SCI_FLAG_RESP_GENERIC_ACK ? true : false; +} + +/** + * ti_sci_set_device_state() - Set device state helper + * @handle: pointer to TI SCI handle + * @id: Device identifier + * @flags: flags to setup for the device + * @state: State to move the device to + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_set_device_state(const struct ti_sci_handle *handle, + u32 id, u32 flags, u8 state) +{ + struct ti_sci_info *info; + struct ti_sci_msg_req_set_device_state *req; + struct ti_sci_msg_hdr *resp; + struct ti_sci_xfer *xfer; + struct device *dev; + int ret = 0; + + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (!handle) + return -EINVAL; + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_SET_DEVICE_STATE, + flags | TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "Message alloc failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_req_set_device_state *)xfer->xfer_buf; + req->id = id; + req->state = state; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "Mbox send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_hdr *)xfer->xfer_buf; + + ret = ti_sci_is_response_ack(resp) ? 0 : -ENODEV; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + + return ret; +} + +/** + * ti_sci_get_device_state() - Get device state helper + * @handle: Handle to the device + * @id: Device Identifier + * @clcnt: Pointer to Context Loss Count + * @resets: pointer to resets + * @p_state: pointer to p_state + * @c_state: pointer to c_state + * + * Return: 0 if all went fine, else return appropriate error. + */ +static int ti_sci_get_device_state(const struct ti_sci_handle *handle, + u32 id, u32 *clcnt, u32 *resets, + u8 *p_state, u8 *c_state) +{ + struct ti_sci_info *info; + struct ti_sci_msg_req_get_device_state *req; + struct ti_sci_msg_resp_get_device_state *resp; + struct ti_sci_xfer *xfer; + struct device *dev; + int ret = 0; + + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (!handle) + return -EINVAL; + + if (!clcnt && !resets && !p_state && !c_state) + return -EINVAL; + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + /* Response is expected, so need of any flags */ + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_GET_DEVICE_STATE, + 0, sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "Message alloc failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_req_get_device_state *)xfer->xfer_buf; + req->id = id; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "Mbox send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_resp_get_device_state *)xfer->xfer_buf; + if (!ti_sci_is_response_ack(resp)) { + ret = -ENODEV; + goto fail; + } + + if (clcnt) + *clcnt = resp->context_loss_count; + if (resets) + *resets = resp->resets; + if (p_state) + *p_state = resp->programmed_state; + if (c_state) + *c_state = resp->current_state; +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + + return ret; +} + +/** + * ti_sci_cmd_get_device() - command to request for device managed by TISCI + * @handle: Pointer to TISCI handle as retrieved by *ti_sci_get_handle + * @id: Device Identifier + * + * Request for the device - NOTE: the client MUST maintain integrity of + * usage count by balancing get_device with put_device. No refcounting is + * managed by driver for that purpose. + * + * NOTE: The request is for exclusive access for the processor. + * + * Return: 0 if all went fine, else return appropriate error. + */ +static int ti_sci_cmd_get_device(const struct ti_sci_handle *handle, u32 id) +{ + return ti_sci_set_device_state(handle, id, + MSG_FLAG_DEVICE_EXCLUSIVE, + MSG_DEVICE_SW_STATE_ON); +} + +/** + * ti_sci_cmd_idle_device() - Command to idle a device managed by TISCI + * @handle: Pointer to TISCI handle as retrieved by *ti_sci_get_handle + * @id: Device Identifier + * + * Request for the device - NOTE: the client MUST maintain integrity of + * usage count by balancing get_device with put_device. No refcounting is + * managed by driver for that purpose. + * + * Return: 0 if all went fine, else return appropriate error. + */ +static int ti_sci_cmd_idle_device(const struct ti_sci_handle *handle, u32 id) +{ + return ti_sci_set_device_state(handle, id, + MSG_FLAG_DEVICE_EXCLUSIVE, + MSG_DEVICE_SW_STATE_RETENTION); +} + +/** + * ti_sci_cmd_put_device() - command to release a device managed by TISCI + * @handle: Pointer to TISCI handle as retrieved by *ti_sci_get_handle + * @id: Device Identifier + * + * Request for the device - NOTE: the client MUST maintain integrity of + * usage count by balancing get_device with put_device. No refcounting is + * managed by driver for that purpose. + * + * Return: 0 if all went fine, else return appropriate error. + */ +static int ti_sci_cmd_put_device(const struct ti_sci_handle *handle, u32 id) +{ + return ti_sci_set_device_state(handle, id, + 0, MSG_DEVICE_SW_STATE_AUTO_OFF); +} + +/** + * ti_sci_cmd_dev_is_valid() - Is the device valid + * @handle: Pointer to TISCI handle as retrieved by *ti_sci_get_handle + * @id: Device Identifier + * + * Return: 0 if all went fine and the device ID is valid, else return + * appropriate error. + */ +static int ti_sci_cmd_dev_is_valid(const struct ti_sci_handle *handle, u32 id) +{ + u8 unused; + + /* check the device state which will also tell us if the ID is valid */ + return ti_sci_get_device_state(handle, id, NULL, NULL, NULL, &unused); +} + +/** + * ti_sci_cmd_dev_get_clcnt() - Get context loss counter + * @handle: Pointer to TISCI handle + * @id: Device Identifier + * @count: Pointer to Context Loss counter to populate + * + * Return: 0 if all went fine, else return appropriate error. + */ +static int ti_sci_cmd_dev_get_clcnt(const struct ti_sci_handle *handle, u32 id, + u32 *count) +{ + return ti_sci_get_device_state(handle, id, count, NULL, NULL, NULL); +} + +/** + * ti_sci_cmd_dev_is_idle() - Check if the device is requested to be idle + * @handle: Pointer to TISCI handle + * @id: Device Identifier + * @r_state: true if requested to be idle + * + * Return: 0 if all went fine, else return appropriate error. + */ +static int ti_sci_cmd_dev_is_idle(const struct ti_sci_handle *handle, u32 id, + bool *r_state) +{ + int ret; + u8 state; + + if (!r_state) + return -EINVAL; + + ret = ti_sci_get_device_state(handle, id, NULL, NULL, &state, NULL); + if (ret) + return ret; + + *r_state = (state == MSG_DEVICE_SW_STATE_RETENTION); + + return 0; +} + +/** + * ti_sci_cmd_dev_is_stop() - Check if the device is requested to be stopped + * @handle: Pointer to TISCI handle + * @id: Device Identifier + * @r_state: true if requested to be stopped + * @curr_state: true if currently stopped. + * + * Return: 0 if all went fine, else return appropriate error. + */ +static int ti_sci_cmd_dev_is_stop(const struct ti_sci_handle *handle, u32 id, + bool *r_state, bool *curr_state) +{ + int ret; + u8 p_state, c_state; + + if (!r_state && !curr_state) + return -EINVAL; + + ret = + ti_sci_get_device_state(handle, id, NULL, NULL, &p_state, &c_state); + if (ret) + return ret; + + if (r_state) + *r_state = (p_state == MSG_DEVICE_SW_STATE_AUTO_OFF); + if (curr_state) + *curr_state = (c_state == MSG_DEVICE_HW_STATE_OFF); + + return 0; +} + +/** + * ti_sci_cmd_dev_is_on() - Check if the device is requested to be ON + * @handle: Pointer to TISCI handle + * @id: Device Identifier + * @r_state: true if requested to be ON + * @curr_state: true if currently ON and active + * + * Return: 0 if all went fine, else return appropriate error. + */ +static int ti_sci_cmd_dev_is_on(const struct ti_sci_handle *handle, u32 id, + bool *r_state, bool *curr_state) +{ + int ret; + u8 p_state, c_state; + + if (!r_state && !curr_state) + return -EINVAL; + + ret = + ti_sci_get_device_state(handle, id, NULL, NULL, &p_state, &c_state); + if (ret) + return ret; + + if (r_state) + *r_state = (p_state == MSG_DEVICE_SW_STATE_ON); + if (curr_state) + *curr_state = (c_state == MSG_DEVICE_HW_STATE_ON); + + return 0; +} + +/** + * ti_sci_cmd_dev_is_trans() - Check if the device is currently transitioning + * @handle: Pointer to TISCI handle + * @id: Device Identifier + * @curr_state: true if currently transitioning. + * + * Return: 0 if all went fine, else return appropriate error. + */ +static int ti_sci_cmd_dev_is_trans(const struct ti_sci_handle *handle, u32 id, + bool *curr_state) +{ + int ret; + u8 state; + + if (!curr_state) + return -EINVAL; + + ret = ti_sci_get_device_state(handle, id, NULL, NULL, NULL, &state); + if (ret) + return ret; + + *curr_state = (state == MSG_DEVICE_HW_STATE_TRANS); + + return 0; +} + +/** + * ti_sci_cmd_set_device_resets() - command to set resets for device managed + * by TISCI + * @handle: Pointer to TISCI handle as retrieved by *ti_sci_get_handle + * @id: Device Identifier + * @reset_state: Device specific reset bit field + * + * Return: 0 if all went fine, else return appropriate error. + */ +static int ti_sci_cmd_set_device_resets(const struct ti_sci_handle *handle, + u32 id, u32 reset_state) +{ + struct ti_sci_info *info; + struct ti_sci_msg_req_set_device_resets *req; + struct ti_sci_msg_hdr *resp; + struct ti_sci_xfer *xfer; + struct device *dev; + int ret = 0; + + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (!handle) + return -EINVAL; + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_SET_DEVICE_RESETS, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "Message alloc failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_req_set_device_resets *)xfer->xfer_buf; + req->id = id; + req->resets = reset_state; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "Mbox send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_hdr *)xfer->xfer_buf; + + ret = ti_sci_is_response_ack(resp) ? 0 : -ENODEV; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + + return ret; +} + +/** + * ti_sci_cmd_get_device_resets() - Get reset state for device managed + * by TISCI + * @handle: Pointer to TISCI handle + * @id: Device Identifier + * @reset_state: Pointer to reset state to populate + * + * Return: 0 if all went fine, else return appropriate error. + */ +static int ti_sci_cmd_get_device_resets(const struct ti_sci_handle *handle, + u32 id, u32 *reset_state) +{ + return ti_sci_get_device_state(handle, id, NULL, reset_state, NULL, + NULL); +} + +/* + * ti_sci_setup_ops() - Setup the operations structures + * @info: pointer to TISCI pointer + */ +static void ti_sci_setup_ops(struct ti_sci_info *info) +{ + struct ti_sci_ops *ops = &info->handle.ops; + struct ti_sci_dev_ops *dops = &ops->dev_ops; + + dops->get_device = ti_sci_cmd_get_device; + dops->idle_device = ti_sci_cmd_idle_device; + dops->put_device = ti_sci_cmd_put_device; + + dops->is_valid = ti_sci_cmd_dev_is_valid; + dops->get_context_loss_count = ti_sci_cmd_dev_get_clcnt; + dops->is_idle = ti_sci_cmd_dev_is_idle; + dops->is_stop = ti_sci_cmd_dev_is_stop; + dops->is_on = ti_sci_cmd_dev_is_on; + dops->is_transitioning = ti_sci_cmd_dev_is_trans; + dops->set_device_resets = ti_sci_cmd_set_device_resets; + dops->get_device_resets = ti_sci_cmd_get_device_resets; +} + /** * ti_sci_get_handle() - Get the TI SCI handle for a device * @dev: Pointer to device for which we want SCI handle @@ -727,6 +1158,8 @@ static int ti_sci_probe(struct platform_device *pdev) goto out; } + ti_sci_setup_ops(info); + dev_info(dev, "ABI: %d.%d (firmware rev 0x%04x '%s')\n", info->handle.version.abi_major, info->handle.version.abi_minor, info->handle.version.firmware_revision, diff --git a/drivers/firmware/ti_sci.h b/drivers/firmware/ti_sci.h index e9dc53f26e0e..29ce0532a7ca 100644 --- a/drivers/firmware/ti_sci.h +++ b/drivers/firmware/ti_sci.h @@ -47,6 +47,11 @@ #define TI_SCI_MSG_WAKE_REASON 0x0003 #define TI_SCI_MSG_GOODBYE 0x0004 +/* Device requests */ +#define TI_SCI_MSG_SET_DEVICE_STATE 0x0200 +#define TI_SCI_MSG_GET_DEVICE_STATE 0x0201 +#define TI_SCI_MSG_SET_DEVICE_RESETS 0x0202 + /** * struct ti_sci_msg_hdr - Generic Message Header for All messages and responses * @type: Type of messages: One of TI_SCI_MSG* values @@ -90,4 +95,97 @@ struct ti_sci_msg_resp_version { u8 abi_minor; } __packed; +/** + * struct ti_sci_msg_req_set_device_state - Set the desired state of the device + * @hdr: Generic header + * @id: Indicates which device to modify + * @reserved: Reserved space in message, must be 0 for backward compatibility + * @state: The desired state of the device. + * + * Certain flags can also be set to alter the device state: + * + MSG_FLAG_DEVICE_WAKE_ENABLED - Configure the device to be a wake source. + * The meaning of this flag will vary slightly from device to device and from + * SoC to SoC but it generally allows the device to wake the SoC out of deep + * suspend states. + * + MSG_FLAG_DEVICE_RESET_ISO - Enable reset isolation for this device. + * + MSG_FLAG_DEVICE_EXCLUSIVE - Claim this device exclusively. When passed + * with STATE_RETENTION or STATE_ON, it will claim the device exclusively. + * If another host already has this device set to STATE_RETENTION or STATE_ON, + * the message will fail. Once successful, other hosts attempting to set + * STATE_RETENTION or STATE_ON will fail. + * + * Request type is TI_SCI_MSG_SET_DEVICE_STATE, responded with a generic + * ACK/NACK message. + */ +struct ti_sci_msg_req_set_device_state { + /* Additional hdr->flags options */ +#define MSG_FLAG_DEVICE_WAKE_ENABLED TI_SCI_MSG_FLAG(8) +#define MSG_FLAG_DEVICE_RESET_ISO TI_SCI_MSG_FLAG(9) +#define MSG_FLAG_DEVICE_EXCLUSIVE TI_SCI_MSG_FLAG(10) + struct ti_sci_msg_hdr hdr; + u32 id; + u32 reserved; + +#define MSG_DEVICE_SW_STATE_AUTO_OFF 0 +#define MSG_DEVICE_SW_STATE_RETENTION 1 +#define MSG_DEVICE_SW_STATE_ON 2 + u8 state; +} __packed; + +/** + * struct ti_sci_msg_req_get_device_state - Request to get device. + * @hdr: Generic header + * @id: Device Identifier + * + * Request type is TI_SCI_MSG_GET_DEVICE_STATE, responded device state + * information + */ +struct ti_sci_msg_req_get_device_state { + struct ti_sci_msg_hdr hdr; + u32 id; +} __packed; + +/** + * struct ti_sci_msg_resp_get_device_state - Response to get device request. + * @hdr: Generic header + * @context_loss_count: Indicates how many times the device has lost context. A + * driver can use this monotonic counter to determine if the device has + * lost context since the last time this message was exchanged. + * @resets: Programmed state of the reset lines. + * @programmed_state: The state as programmed by set_device. + * - Uses the MSG_DEVICE_SW_* macros + * @current_state: The actual state of the hardware. + * + * Response to request TI_SCI_MSG_GET_DEVICE_STATE. + */ +struct ti_sci_msg_resp_get_device_state { + struct ti_sci_msg_hdr hdr; + u32 context_loss_count; + u32 resets; + u8 programmed_state; +#define MSG_DEVICE_HW_STATE_OFF 0 +#define MSG_DEVICE_HW_STATE_ON 1 +#define MSG_DEVICE_HW_STATE_TRANS 2 + u8 current_state; +} __packed; + +/** + * struct ti_sci_msg_req_set_device_resets - Set the desired resets + * configuration of the device + * @hdr: Generic header + * @id: Indicates which device to modify + * @resets: A bit field of resets for the device. The meaning, behavior, + * and usage of the reset flags are device specific. 0 for a bit + * indicates releasing the reset represented by that bit while 1 + * indicates keeping it held. + * + * Request type is TI_SCI_MSG_SET_DEVICE_RESETS, responded with a generic + * ACK/NACK message. + */ +struct ti_sci_msg_req_set_device_resets { + struct ti_sci_msg_hdr hdr; + u32 id; + u32 resets; +} __packed; + #endif /* __TI_SCI_H */ diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index e73483fd5327..87fa73851471 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -33,12 +33,103 @@ struct ti_sci_version_info { char firmware_description[32]; }; +struct ti_sci_handle; + +/** + * struct ti_sci_dev_ops - Device control operations + * @get_device: Command to request for device managed by TISCI + * Returns 0 for successful exclusive request, else returns + * corresponding error message. + * @idle_device: Command to idle a device managed by TISCI + * Returns 0 for successful exclusive request, else returns + * corresponding error message. + * @put_device: Command to release a device managed by TISCI + * Returns 0 for successful release, else returns corresponding + * error message. + * @is_valid: Check if the device ID is a valid ID. + * Returns 0 if the ID is valid, else returns corresponding error. + * @get_context_loss_count: Command to retrieve context loss counter - this + * increments every time the device looses context. Overflow + * is possible. + * - count: pointer to u32 which will retrieve counter + * Returns 0 for successful information request and count has + * proper data, else returns corresponding error message. + * @is_idle: Reports back about device idle state + * - req_state: Returns requested idle state + * Returns 0 for successful information request and req_state and + * current_state has proper data, else returns corresponding error + * message. + * @is_stop: Reports back about device stop state + * - req_state: Returns requested stop state + * - current_state: Returns current stop state + * Returns 0 for successful information request and req_state and + * current_state has proper data, else returns corresponding error + * message. + * @is_on: Reports back about device ON(or active) state + * - req_state: Returns requested ON state + * - current_state: Returns current ON state + * Returns 0 for successful information request and req_state and + * current_state has proper data, else returns corresponding error + * message. + * @is_transitioning: Reports back if the device is in the middle of transition + * of state. + * -current_state: Returns 'true' if currently transitioning. + * @set_device_resets: Command to configure resets for device managed by TISCI. + * -reset_state: Device specific reset bit field + * Returns 0 for successful request, else returns + * corresponding error message. + * @get_device_resets: Command to read state of resets for device managed + * by TISCI. + * -reset_state: pointer to u32 which will retrieve resets + * Returns 0 for successful request, else returns + * corresponding error message. + * + * NOTE: for all these functions, the following parameters are generic in + * nature: + * -handle: Pointer to TISCI handle as retrieved by *ti_sci_get_handle + * -id: Device Identifier + * + * Request for the device - NOTE: the client MUST maintain integrity of + * usage count by balancing get_device with put_device. No refcounting is + * managed by driver for that purpose. + */ +struct ti_sci_dev_ops { + int (*get_device)(const struct ti_sci_handle *handle, u32 id); + int (*idle_device)(const struct ti_sci_handle *handle, u32 id); + int (*put_device)(const struct ti_sci_handle *handle, u32 id); + int (*is_valid)(const struct ti_sci_handle *handle, u32 id); + int (*get_context_loss_count)(const struct ti_sci_handle *handle, + u32 id, u32 *count); + int (*is_idle)(const struct ti_sci_handle *handle, u32 id, + bool *requested_state); + int (*is_stop)(const struct ti_sci_handle *handle, u32 id, + bool *req_state, bool *current_state); + int (*is_on)(const struct ti_sci_handle *handle, u32 id, + bool *req_state, bool *current_state); + int (*is_transitioning)(const struct ti_sci_handle *handle, u32 id, + bool *current_state); + int (*set_device_resets)(const struct ti_sci_handle *handle, u32 id, + u32 reset_state); + int (*get_device_resets)(const struct ti_sci_handle *handle, u32 id, + u32 *reset_state); +}; + +/** + * struct ti_sci_ops - Function support for TI SCI + * @dev_ops: Device specific operations + */ +struct ti_sci_ops { + struct ti_sci_dev_ops dev_ops; +}; + /** * struct ti_sci_handle - Handle returned to TI SCI clients for usage. * @version: structure containing version information + * @ops: operations that are made available to TI SCI clients */ struct ti_sci_handle { struct ti_sci_version_info version; + struct ti_sci_ops ops; }; #if IS_ENABLED(CONFIG_TI_SCI_PROTOCOL) -- cgit v1.2.3 From 9f72322050e4762adde66619f048b7317ad12d77 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Tue, 18 Oct 2016 18:08:36 -0500 Subject: firmware: ti_sci: Add support for Clock control Texas Instrument's System Control Interface (TI-SCI) Message Protocol is used in Texas Instrument's System on Chip (SoC) such as those in keystone family K2G SoC to communicate between various compute processors with a central system controller entity. TI-SCI message protocol provides support for management of various hardware entities within the SoC. Add support driver to allow communication with system controller entity within the SoC using the mailbox client. In general, we expect to function at a device level of abstraction, however, for proper operation of hardware blocks, many clocks directly supplying the hardware block needs to be queried or configured. Introduce support for the set of SCI message protocol support that provide us with this capability. Signed-off-by: Nishanth Menon Signed-off-by: Tero Kristo --- drivers/firmware/ti_sci.c | 685 +++++++++++++++++++++++++++++++++ drivers/firmware/ti_sci.h | 289 ++++++++++++++ include/linux/soc/ti/ti_sci_protocol.h | 78 ++++ 3 files changed, 1052 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c index c7b25ccf6f07..496a007e5c69 100644 --- a/drivers/firmware/ti_sci.c +++ b/drivers/firmware/ti_sci.c @@ -902,6 +902,675 @@ static int ti_sci_cmd_get_device_resets(const struct ti_sci_handle *handle, NULL); } +/** + * ti_sci_set_clock_state() - Set clock state helper + * @handle: pointer to TI SCI handle + * @dev_id: Device identifier this request is for + * @clk_id: Clock identifier for the device for this request. + * Each device has it's own set of clock inputs. This indexes + * which clock input to modify. + * @flags: Header flags as needed + * @state: State to request for the clock. + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_set_clock_state(const struct ti_sci_handle *handle, + u32 dev_id, u8 clk_id, + u32 flags, u8 state) +{ + struct ti_sci_info *info; + struct ti_sci_msg_req_set_clock_state *req; + struct ti_sci_msg_hdr *resp; + struct ti_sci_xfer *xfer; + struct device *dev; + int ret = 0; + + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (!handle) + return -EINVAL; + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_SET_CLOCK_STATE, + flags | TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "Message alloc failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_req_set_clock_state *)xfer->xfer_buf; + req->dev_id = dev_id; + req->clk_id = clk_id; + req->request_state = state; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "Mbox send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_hdr *)xfer->xfer_buf; + + ret = ti_sci_is_response_ack(resp) ? 0 : -ENODEV; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + + return ret; +} + +/** + * ti_sci_cmd_get_clock_state() - Get clock state helper + * @handle: pointer to TI SCI handle + * @dev_id: Device identifier this request is for + * @clk_id: Clock identifier for the device for this request. + * Each device has it's own set of clock inputs. This indexes + * which clock input to modify. + * @programmed_state: State requested for clock to move to + * @current_state: State that the clock is currently in + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_cmd_get_clock_state(const struct ti_sci_handle *handle, + u32 dev_id, u8 clk_id, + u8 *programmed_state, u8 *current_state) +{ + struct ti_sci_info *info; + struct ti_sci_msg_req_get_clock_state *req; + struct ti_sci_msg_resp_get_clock_state *resp; + struct ti_sci_xfer *xfer; + struct device *dev; + int ret = 0; + + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (!handle) + return -EINVAL; + + if (!programmed_state && !current_state) + return -EINVAL; + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_GET_CLOCK_STATE, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "Message alloc failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_req_get_clock_state *)xfer->xfer_buf; + req->dev_id = dev_id; + req->clk_id = clk_id; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "Mbox send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_resp_get_clock_state *)xfer->xfer_buf; + + if (!ti_sci_is_response_ack(resp)) { + ret = -ENODEV; + goto fail; + } + + if (programmed_state) + *programmed_state = resp->programmed_state; + if (current_state) + *current_state = resp->current_state; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + + return ret; +} + +/** + * ti_sci_cmd_get_clock() - Get control of a clock from TI SCI + * @handle: pointer to TI SCI handle + * @dev_id: Device identifier this request is for + * @clk_id: Clock identifier for the device for this request. + * Each device has it's own set of clock inputs. This indexes + * which clock input to modify. + * @needs_ssc: 'true' if Spread Spectrum clock is desired, else 'false' + * @can_change_freq: 'true' if frequency change is desired, else 'false' + * @enable_input_term: 'true' if input termination is desired, else 'false' + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_cmd_get_clock(const struct ti_sci_handle *handle, u32 dev_id, + u8 clk_id, bool needs_ssc, bool can_change_freq, + bool enable_input_term) +{ + u32 flags = 0; + + flags |= needs_ssc ? MSG_FLAG_CLOCK_ALLOW_SSC : 0; + flags |= can_change_freq ? MSG_FLAG_CLOCK_ALLOW_FREQ_CHANGE : 0; + flags |= enable_input_term ? MSG_FLAG_CLOCK_INPUT_TERM : 0; + + return ti_sci_set_clock_state(handle, dev_id, clk_id, flags, + MSG_CLOCK_SW_STATE_REQ); +} + +/** + * ti_sci_cmd_idle_clock() - Idle a clock which is in our control + * @handle: pointer to TI SCI handle + * @dev_id: Device identifier this request is for + * @clk_id: Clock identifier for the device for this request. + * Each device has it's own set of clock inputs. This indexes + * which clock input to modify. + * + * NOTE: This clock must have been requested by get_clock previously. + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_cmd_idle_clock(const struct ti_sci_handle *handle, + u32 dev_id, u8 clk_id) +{ + return ti_sci_set_clock_state(handle, dev_id, clk_id, 0, + MSG_CLOCK_SW_STATE_UNREQ); +} + +/** + * ti_sci_cmd_put_clock() - Release a clock from our control back to TISCI + * @handle: pointer to TI SCI handle + * @dev_id: Device identifier this request is for + * @clk_id: Clock identifier for the device for this request. + * Each device has it's own set of clock inputs. This indexes + * which clock input to modify. + * + * NOTE: This clock must have been requested by get_clock previously. + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_cmd_put_clock(const struct ti_sci_handle *handle, + u32 dev_id, u8 clk_id) +{ + return ti_sci_set_clock_state(handle, dev_id, clk_id, 0, + MSG_CLOCK_SW_STATE_AUTO); +} + +/** + * ti_sci_cmd_clk_is_auto() - Is the clock being auto managed + * @handle: pointer to TI SCI handle + * @dev_id: Device identifier this request is for + * @clk_id: Clock identifier for the device for this request. + * Each device has it's own set of clock inputs. This indexes + * which clock input to modify. + * @req_state: state indicating if the clock is auto managed + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_cmd_clk_is_auto(const struct ti_sci_handle *handle, + u32 dev_id, u8 clk_id, bool *req_state) +{ + u8 state = 0; + int ret; + + if (!req_state) + return -EINVAL; + + ret = ti_sci_cmd_get_clock_state(handle, dev_id, clk_id, &state, NULL); + if (ret) + return ret; + + *req_state = (state == MSG_CLOCK_SW_STATE_AUTO); + return 0; +} + +/** + * ti_sci_cmd_clk_is_on() - Is the clock ON + * @handle: pointer to TI SCI handle + * @dev_id: Device identifier this request is for + * @clk_id: Clock identifier for the device for this request. + * Each device has it's own set of clock inputs. This indexes + * which clock input to modify. + * @req_state: state indicating if the clock is managed by us and enabled + * @curr_state: state indicating if the clock is ready for operation + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_cmd_clk_is_on(const struct ti_sci_handle *handle, u32 dev_id, + u8 clk_id, bool *req_state, bool *curr_state) +{ + u8 c_state = 0, r_state = 0; + int ret; + + if (!req_state && !curr_state) + return -EINVAL; + + ret = ti_sci_cmd_get_clock_state(handle, dev_id, clk_id, + &r_state, &c_state); + if (ret) + return ret; + + if (req_state) + *req_state = (r_state == MSG_CLOCK_SW_STATE_REQ); + if (curr_state) + *curr_state = (c_state == MSG_CLOCK_HW_STATE_READY); + return 0; +} + +/** + * ti_sci_cmd_clk_is_off() - Is the clock OFF + * @handle: pointer to TI SCI handle + * @dev_id: Device identifier this request is for + * @clk_id: Clock identifier for the device for this request. + * Each device has it's own set of clock inputs. This indexes + * which clock input to modify. + * @req_state: state indicating if the clock is managed by us and disabled + * @curr_state: state indicating if the clock is NOT ready for operation + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_cmd_clk_is_off(const struct ti_sci_handle *handle, u32 dev_id, + u8 clk_id, bool *req_state, bool *curr_state) +{ + u8 c_state = 0, r_state = 0; + int ret; + + if (!req_state && !curr_state) + return -EINVAL; + + ret = ti_sci_cmd_get_clock_state(handle, dev_id, clk_id, + &r_state, &c_state); + if (ret) + return ret; + + if (req_state) + *req_state = (r_state == MSG_CLOCK_SW_STATE_UNREQ); + if (curr_state) + *curr_state = (c_state == MSG_CLOCK_HW_STATE_NOT_READY); + return 0; +} + +/** + * ti_sci_cmd_clk_set_parent() - Set the clock source of a specific device clock + * @handle: pointer to TI SCI handle + * @dev_id: Device identifier this request is for + * @clk_id: Clock identifier for the device for this request. + * Each device has it's own set of clock inputs. This indexes + * which clock input to modify. + * @parent_id: Parent clock identifier to set + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_cmd_clk_set_parent(const struct ti_sci_handle *handle, + u32 dev_id, u8 clk_id, u8 parent_id) +{ + struct ti_sci_info *info; + struct ti_sci_msg_req_set_clock_parent *req; + struct ti_sci_msg_hdr *resp; + struct ti_sci_xfer *xfer; + struct device *dev; + int ret = 0; + + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (!handle) + return -EINVAL; + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_SET_CLOCK_PARENT, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "Message alloc failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_req_set_clock_parent *)xfer->xfer_buf; + req->dev_id = dev_id; + req->clk_id = clk_id; + req->parent_id = parent_id; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "Mbox send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_hdr *)xfer->xfer_buf; + + ret = ti_sci_is_response_ack(resp) ? 0 : -ENODEV; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + + return ret; +} + +/** + * ti_sci_cmd_clk_get_parent() - Get current parent clock source + * @handle: pointer to TI SCI handle + * @dev_id: Device identifier this request is for + * @clk_id: Clock identifier for the device for this request. + * Each device has it's own set of clock inputs. This indexes + * which clock input to modify. + * @parent_id: Current clock parent + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_cmd_clk_get_parent(const struct ti_sci_handle *handle, + u32 dev_id, u8 clk_id, u8 *parent_id) +{ + struct ti_sci_info *info; + struct ti_sci_msg_req_get_clock_parent *req; + struct ti_sci_msg_resp_get_clock_parent *resp; + struct ti_sci_xfer *xfer; + struct device *dev; + int ret = 0; + + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (!handle || !parent_id) + return -EINVAL; + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_GET_CLOCK_PARENT, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "Message alloc failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_req_get_clock_parent *)xfer->xfer_buf; + req->dev_id = dev_id; + req->clk_id = clk_id; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "Mbox send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_resp_get_clock_parent *)xfer->xfer_buf; + + if (!ti_sci_is_response_ack(resp)) + ret = -ENODEV; + else + *parent_id = resp->parent_id; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + + return ret; +} + +/** + * ti_sci_cmd_clk_get_num_parents() - Get num parents of the current clk source + * @handle: pointer to TI SCI handle + * @dev_id: Device identifier this request is for + * @clk_id: Clock identifier for the device for this request. + * Each device has it's own set of clock inputs. This indexes + * which clock input to modify. + * @num_parents: Returns he number of parents to the current clock. + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_cmd_clk_get_num_parents(const struct ti_sci_handle *handle, + u32 dev_id, u8 clk_id, + u8 *num_parents) +{ + struct ti_sci_info *info; + struct ti_sci_msg_req_get_clock_num_parents *req; + struct ti_sci_msg_resp_get_clock_num_parents *resp; + struct ti_sci_xfer *xfer; + struct device *dev; + int ret = 0; + + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (!handle || !num_parents) + return -EINVAL; + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_GET_NUM_CLOCK_PARENTS, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "Message alloc failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_req_get_clock_num_parents *)xfer->xfer_buf; + req->dev_id = dev_id; + req->clk_id = clk_id; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "Mbox send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_resp_get_clock_num_parents *)xfer->xfer_buf; + + if (!ti_sci_is_response_ack(resp)) + ret = -ENODEV; + else + *num_parents = resp->num_parents; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + + return ret; +} + +/** + * ti_sci_cmd_clk_get_match_freq() - Find a good match for frequency + * @handle: pointer to TI SCI handle + * @dev_id: Device identifier this request is for + * @clk_id: Clock identifier for the device for this request. + * Each device has it's own set of clock inputs. This indexes + * which clock input to modify. + * @min_freq: The minimum allowable frequency in Hz. This is the minimum + * allowable programmed frequency and does not account for clock + * tolerances and jitter. + * @target_freq: The target clock frequency in Hz. A frequency will be + * processed as close to this target frequency as possible. + * @max_freq: The maximum allowable frequency in Hz. This is the maximum + * allowable programmed frequency and does not account for clock + * tolerances and jitter. + * @match_freq: Frequency match in Hz response. + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_cmd_clk_get_match_freq(const struct ti_sci_handle *handle, + u32 dev_id, u8 clk_id, u64 min_freq, + u64 target_freq, u64 max_freq, + u64 *match_freq) +{ + struct ti_sci_info *info; + struct ti_sci_msg_req_query_clock_freq *req; + struct ti_sci_msg_resp_query_clock_freq *resp; + struct ti_sci_xfer *xfer; + struct device *dev; + int ret = 0; + + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (!handle || !match_freq) + return -EINVAL; + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_QUERY_CLOCK_FREQ, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "Message alloc failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_req_query_clock_freq *)xfer->xfer_buf; + req->dev_id = dev_id; + req->clk_id = clk_id; + req->min_freq_hz = min_freq; + req->target_freq_hz = target_freq; + req->max_freq_hz = max_freq; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "Mbox send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_resp_query_clock_freq *)xfer->xfer_buf; + + if (!ti_sci_is_response_ack(resp)) + ret = -ENODEV; + else + *match_freq = resp->freq_hz; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + + return ret; +} + +/** + * ti_sci_cmd_clk_set_freq() - Set a frequency for clock + * @handle: pointer to TI SCI handle + * @dev_id: Device identifier this request is for + * @clk_id: Clock identifier for the device for this request. + * Each device has it's own set of clock inputs. This indexes + * which clock input to modify. + * @min_freq: The minimum allowable frequency in Hz. This is the minimum + * allowable programmed frequency and does not account for clock + * tolerances and jitter. + * @target_freq: The target clock frequency in Hz. A frequency will be + * processed as close to this target frequency as possible. + * @max_freq: The maximum allowable frequency in Hz. This is the maximum + * allowable programmed frequency and does not account for clock + * tolerances and jitter. + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_cmd_clk_set_freq(const struct ti_sci_handle *handle, + u32 dev_id, u8 clk_id, u64 min_freq, + u64 target_freq, u64 max_freq) +{ + struct ti_sci_info *info; + struct ti_sci_msg_req_set_clock_freq *req; + struct ti_sci_msg_hdr *resp; + struct ti_sci_xfer *xfer; + struct device *dev; + int ret = 0; + + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (!handle) + return -EINVAL; + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_SET_CLOCK_FREQ, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "Message alloc failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_req_set_clock_freq *)xfer->xfer_buf; + req->dev_id = dev_id; + req->clk_id = clk_id; + req->min_freq_hz = min_freq; + req->target_freq_hz = target_freq; + req->max_freq_hz = max_freq; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "Mbox send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_hdr *)xfer->xfer_buf; + + ret = ti_sci_is_response_ack(resp) ? 0 : -ENODEV; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + + return ret; +} + +/** + * ti_sci_cmd_clk_get_freq() - Get current frequency + * @handle: pointer to TI SCI handle + * @dev_id: Device identifier this request is for + * @clk_id: Clock identifier for the device for this request. + * Each device has it's own set of clock inputs. This indexes + * which clock input to modify. + * @freq: Currently frequency in Hz + * + * Return: 0 if all went well, else returns appropriate error value. + */ +static int ti_sci_cmd_clk_get_freq(const struct ti_sci_handle *handle, + u32 dev_id, u8 clk_id, u64 *freq) +{ + struct ti_sci_info *info; + struct ti_sci_msg_req_get_clock_freq *req; + struct ti_sci_msg_resp_get_clock_freq *resp; + struct ti_sci_xfer *xfer; + struct device *dev; + int ret = 0; + + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (!handle || !freq) + return -EINVAL; + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_GET_CLOCK_FREQ, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "Message alloc failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_req_get_clock_freq *)xfer->xfer_buf; + req->dev_id = dev_id; + req->clk_id = clk_id; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "Mbox send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_resp_get_clock_freq *)xfer->xfer_buf; + + if (!ti_sci_is_response_ack(resp)) + ret = -ENODEV; + else + *freq = resp->freq_hz; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + + return ret; +} + /* * ti_sci_setup_ops() - Setup the operations structures * @info: pointer to TISCI pointer @@ -910,6 +1579,7 @@ static void ti_sci_setup_ops(struct ti_sci_info *info) { struct ti_sci_ops *ops = &info->handle.ops; struct ti_sci_dev_ops *dops = &ops->dev_ops; + struct ti_sci_clk_ops *cops = &ops->clk_ops; dops->get_device = ti_sci_cmd_get_device; dops->idle_device = ti_sci_cmd_idle_device; @@ -923,6 +1593,21 @@ static void ti_sci_setup_ops(struct ti_sci_info *info) dops->is_transitioning = ti_sci_cmd_dev_is_trans; dops->set_device_resets = ti_sci_cmd_set_device_resets; dops->get_device_resets = ti_sci_cmd_get_device_resets; + + cops->get_clock = ti_sci_cmd_get_clock; + cops->idle_clock = ti_sci_cmd_idle_clock; + cops->put_clock = ti_sci_cmd_put_clock; + cops->is_auto = ti_sci_cmd_clk_is_auto; + cops->is_on = ti_sci_cmd_clk_is_on; + cops->is_off = ti_sci_cmd_clk_is_off; + + cops->set_parent = ti_sci_cmd_clk_set_parent; + cops->get_parent = ti_sci_cmd_clk_get_parent; + cops->get_num_parents = ti_sci_cmd_clk_get_num_parents; + + cops->get_best_match_freq = ti_sci_cmd_clk_get_match_freq; + cops->set_freq = ti_sci_cmd_clk_set_freq; + cops->get_freq = ti_sci_cmd_clk_get_freq; } /** diff --git a/drivers/firmware/ti_sci.h b/drivers/firmware/ti_sci.h index 29ce0532a7ca..f69907cfc128 100644 --- a/drivers/firmware/ti_sci.h +++ b/drivers/firmware/ti_sci.h @@ -52,6 +52,16 @@ #define TI_SCI_MSG_GET_DEVICE_STATE 0x0201 #define TI_SCI_MSG_SET_DEVICE_RESETS 0x0202 +/* Clock requests */ +#define TI_SCI_MSG_SET_CLOCK_STATE 0x0100 +#define TI_SCI_MSG_GET_CLOCK_STATE 0x0101 +#define TI_SCI_MSG_SET_CLOCK_PARENT 0x0102 +#define TI_SCI_MSG_GET_CLOCK_PARENT 0x0103 +#define TI_SCI_MSG_GET_NUM_CLOCK_PARENTS 0x0104 +#define TI_SCI_MSG_SET_CLOCK_FREQ 0x010c +#define TI_SCI_MSG_QUERY_CLOCK_FREQ 0x010d +#define TI_SCI_MSG_GET_CLOCK_FREQ 0x010e + /** * struct ti_sci_msg_hdr - Generic Message Header for All messages and responses * @type: Type of messages: One of TI_SCI_MSG* values @@ -188,4 +198,283 @@ struct ti_sci_msg_req_set_device_resets { u32 resets; } __packed; +/** + * struct ti_sci_msg_req_set_clock_state - Request to setup a Clock state + * @hdr: Generic Header, Certain flags can be set specific to the clocks: + * MSG_FLAG_CLOCK_ALLOW_SSC: Allow this clock to be modified + * via spread spectrum clocking. + * MSG_FLAG_CLOCK_ALLOW_FREQ_CHANGE: Allow this clock's + * frequency to be changed while it is running so long as it + * is within the min/max limits. + * MSG_FLAG_CLOCK_INPUT_TERM: Enable input termination, this + * is only applicable to clock inputs on the SoC pseudo-device. + * @dev_id: Device identifier this request is for + * @clk_id: Clock identifier for the device for this request. + * Each device has it's own set of clock inputs. This indexes + * which clock input to modify. + * @request_state: Request the state for the clock to be set to. + * MSG_CLOCK_SW_STATE_UNREQ: The IP does not require this clock, + * it can be disabled, regardless of the state of the device + * MSG_CLOCK_SW_STATE_AUTO: Allow the System Controller to + * automatically manage the state of this clock. If the device + * is enabled, then the clock is enabled. If the device is set + * to off or retention, then the clock is internally set as not + * being required by the device.(default) + * MSG_CLOCK_SW_STATE_REQ: Configure the clock to be enabled, + * regardless of the state of the device. + * + * Normally, all required clocks are managed by TISCI entity, this is used + * only for specific control *IF* required. Auto managed state is + * MSG_CLOCK_SW_STATE_AUTO, in other states, TISCI entity assume remote + * will explicitly control. + * + * Request type is TI_SCI_MSG_SET_CLOCK_STATE, response is a generic + * ACK or NACK message. + */ +struct ti_sci_msg_req_set_clock_state { + /* Additional hdr->flags options */ +#define MSG_FLAG_CLOCK_ALLOW_SSC TI_SCI_MSG_FLAG(8) +#define MSG_FLAG_CLOCK_ALLOW_FREQ_CHANGE TI_SCI_MSG_FLAG(9) +#define MSG_FLAG_CLOCK_INPUT_TERM TI_SCI_MSG_FLAG(10) + struct ti_sci_msg_hdr hdr; + u32 dev_id; + u8 clk_id; +#define MSG_CLOCK_SW_STATE_UNREQ 0 +#define MSG_CLOCK_SW_STATE_AUTO 1 +#define MSG_CLOCK_SW_STATE_REQ 2 + u8 request_state; +} __packed; + +/** + * struct ti_sci_msg_req_get_clock_state - Request for clock state + * @hdr: Generic Header + * @dev_id: Device identifier this request is for + * @clk_id: Clock identifier for the device for this request. + * Each device has it's own set of clock inputs. This indexes + * which clock input to get state of. + * + * Request type is TI_SCI_MSG_GET_CLOCK_STATE, response is state + * of the clock + */ +struct ti_sci_msg_req_get_clock_state { + struct ti_sci_msg_hdr hdr; + u32 dev_id; + u8 clk_id; +} __packed; + +/** + * struct ti_sci_msg_resp_get_clock_state - Response to get clock state + * @hdr: Generic Header + * @programmed_state: Any programmed state of the clock. This is one of + * MSG_CLOCK_SW_STATE* values. + * @current_state: Current state of the clock. This is one of: + * MSG_CLOCK_HW_STATE_NOT_READY: Clock is not ready + * MSG_CLOCK_HW_STATE_READY: Clock is ready + * + * Response to TI_SCI_MSG_GET_CLOCK_STATE. + */ +struct ti_sci_msg_resp_get_clock_state { + struct ti_sci_msg_hdr hdr; + u8 programmed_state; +#define MSG_CLOCK_HW_STATE_NOT_READY 0 +#define MSG_CLOCK_HW_STATE_READY 1 + u8 current_state; +} __packed; + +/** + * struct ti_sci_msg_req_set_clock_parent - Set the clock parent + * @hdr: Generic Header + * @dev_id: Device identifier this request is for + * @clk_id: Clock identifier for the device for this request. + * Each device has it's own set of clock inputs. This indexes + * which clock input to modify. + * @parent_id: The new clock parent is selectable by an index via this + * parameter. + * + * Request type is TI_SCI_MSG_SET_CLOCK_PARENT, response is generic + * ACK / NACK message. + */ +struct ti_sci_msg_req_set_clock_parent { + struct ti_sci_msg_hdr hdr; + u32 dev_id; + u8 clk_id; + u8 parent_id; +} __packed; + +/** + * struct ti_sci_msg_req_get_clock_parent - Get the clock parent + * @hdr: Generic Header + * @dev_id: Device identifier this request is for + * @clk_id: Clock identifier for the device for this request. + * Each device has it's own set of clock inputs. This indexes + * which clock input to get the parent for. + * + * Request type is TI_SCI_MSG_GET_CLOCK_PARENT, response is parent information + */ +struct ti_sci_msg_req_get_clock_parent { + struct ti_sci_msg_hdr hdr; + u32 dev_id; + u8 clk_id; +} __packed; + +/** + * struct ti_sci_msg_resp_get_clock_parent - Response with clock parent + * @hdr: Generic Header + * @parent_id: The current clock parent + * + * Response to TI_SCI_MSG_GET_CLOCK_PARENT. + */ +struct ti_sci_msg_resp_get_clock_parent { + struct ti_sci_msg_hdr hdr; + u8 parent_id; +} __packed; + +/** + * struct ti_sci_msg_req_get_clock_num_parents - Request to get clock parents + * @hdr: Generic header + * @dev_id: Device identifier this request is for + * @clk_id: Clock identifier for the device for this request. + * + * This request provides information about how many clock parent options + * are available for a given clock to a device. This is typically used + * for input clocks. + * + * Request type is TI_SCI_MSG_GET_NUM_CLOCK_PARENTS, response is appropriate + * message, or NACK in case of inability to satisfy request. + */ +struct ti_sci_msg_req_get_clock_num_parents { + struct ti_sci_msg_hdr hdr; + u32 dev_id; + u8 clk_id; +} __packed; + +/** + * struct ti_sci_msg_resp_get_clock_num_parents - Response for get clk parents + * @hdr: Generic header + * @num_parents: Number of clock parents + * + * Response to TI_SCI_MSG_GET_NUM_CLOCK_PARENTS + */ +struct ti_sci_msg_resp_get_clock_num_parents { + struct ti_sci_msg_hdr hdr; + u8 num_parents; +} __packed; + +/** + * struct ti_sci_msg_req_query_clock_freq - Request to query a frequency + * @hdr: Generic Header + * @dev_id: Device identifier this request is for + * @min_freq_hz: The minimum allowable frequency in Hz. This is the minimum + * allowable programmed frequency and does not account for clock + * tolerances and jitter. + * @target_freq_hz: The target clock frequency. A frequency will be found + * as close to this target frequency as possible. + * @max_freq_hz: The maximum allowable frequency in Hz. This is the maximum + * allowable programmed frequency and does not account for clock + * tolerances and jitter. + * @clk_id: Clock identifier for the device for this request. + * + * NOTE: Normally clock frequency management is automatically done by TISCI + * entity. In case of specific requests, TISCI evaluates capability to achieve + * requested frequency within provided range and responds with + * result message. + * + * Request type is TI_SCI_MSG_QUERY_CLOCK_FREQ, response is appropriate message, + * or NACK in case of inability to satisfy request. + */ +struct ti_sci_msg_req_query_clock_freq { + struct ti_sci_msg_hdr hdr; + u32 dev_id; + u64 min_freq_hz; + u64 target_freq_hz; + u64 max_freq_hz; + u8 clk_id; +} __packed; + +/** + * struct ti_sci_msg_resp_query_clock_freq - Response to a clock frequency query + * @hdr: Generic Header + * @freq_hz: Frequency that is the best match in Hz. + * + * Response to request type TI_SCI_MSG_QUERY_CLOCK_FREQ. NOTE: if the request + * cannot be satisfied, the message will be of type NACK. + */ +struct ti_sci_msg_resp_query_clock_freq { + struct ti_sci_msg_hdr hdr; + u64 freq_hz; +} __packed; + +/** + * struct ti_sci_msg_req_set_clock_freq - Request to setup a clock frequency + * @hdr: Generic Header + * @dev_id: Device identifier this request is for + * @min_freq_hz: The minimum allowable frequency in Hz. This is the minimum + * allowable programmed frequency and does not account for clock + * tolerances and jitter. + * @target_freq_hz: The target clock frequency. The clock will be programmed + * at a rate as close to this target frequency as possible. + * @max_freq_hz: The maximum allowable frequency in Hz. This is the maximum + * allowable programmed frequency and does not account for clock + * tolerances and jitter. + * @clk_id: Clock identifier for the device for this request. + * + * NOTE: Normally clock frequency management is automatically done by TISCI + * entity. In case of specific requests, TISCI evaluates capability to achieve + * requested range and responds with success/failure message. + * + * This sets the desired frequency for a clock within an allowable + * range. This message will fail on an enabled clock unless + * MSG_FLAG_CLOCK_ALLOW_FREQ_CHANGE is set for the clock. Additionally, + * if other clocks have their frequency modified due to this message, + * they also must have the MSG_FLAG_CLOCK_ALLOW_FREQ_CHANGE or be disabled. + * + * Calling set frequency on a clock input to the SoC pseudo-device will + * inform the PMMC of that clock's frequency. Setting a frequency of + * zero will indicate the clock is disabled. + * + * Calling set frequency on clock outputs from the SoC pseudo-device will + * function similarly to setting the clock frequency on a device. + * + * Request type is TI_SCI_MSG_SET_CLOCK_FREQ, response is a generic ACK/NACK + * message. + */ +struct ti_sci_msg_req_set_clock_freq { + struct ti_sci_msg_hdr hdr; + u32 dev_id; + u64 min_freq_hz; + u64 target_freq_hz; + u64 max_freq_hz; + u8 clk_id; +} __packed; + +/** + * struct ti_sci_msg_req_get_clock_freq - Request to get the clock frequency + * @hdr: Generic Header + * @dev_id: Device identifier this request is for + * @clk_id: Clock identifier for the device for this request. + * + * NOTE: Normally clock frequency management is automatically done by TISCI + * entity. In some cases, clock frequencies are configured by host. + * + * Request type is TI_SCI_MSG_GET_CLOCK_FREQ, responded with clock frequency + * that the clock is currently at. + */ +struct ti_sci_msg_req_get_clock_freq { + struct ti_sci_msg_hdr hdr; + u32 dev_id; + u8 clk_id; +} __packed; + +/** + * struct ti_sci_msg_resp_get_clock_freq - Response of clock frequency request + * @hdr: Generic Header + * @freq_hz: Frequency that the clock is currently on, in Hz. + * + * Response to request type TI_SCI_MSG_GET_CLOCK_FREQ. + */ +struct ti_sci_msg_resp_get_clock_freq { + struct ti_sci_msg_hdr hdr; + u64 freq_hz; +} __packed; + #endif /* __TI_SCI_H */ diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 87fa73851471..76378fddf609 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -114,12 +114,90 @@ struct ti_sci_dev_ops { u32 *reset_state); }; +/** + * struct ti_sci_clk_ops - Clock control operations + * @get_clock: Request for activation of clock and manage by processor + * - needs_ssc: 'true' if Spread Spectrum clock is desired. + * - can_change_freq: 'true' if frequency change is desired. + * - enable_input_term: 'true' if input termination is desired. + * @idle_clock: Request for Idling a clock managed by processor + * @put_clock: Release the clock to be auto managed by TISCI + * @is_auto: Is the clock being auto managed + * - req_state: state indicating if the clock is auto managed + * @is_on: Is the clock ON + * - req_state: if the clock is requested to be forced ON + * - current_state: if the clock is currently ON + * @is_off: Is the clock OFF + * - req_state: if the clock is requested to be forced OFF + * - current_state: if the clock is currently Gated + * @set_parent: Set the clock source of a specific device clock + * - parent_id: Parent clock identifier to set. + * @get_parent: Get the current clock source of a specific device clock + * - parent_id: Parent clock identifier which is the parent. + * @get_num_parents: Get the number of parents of the current clock source + * - num_parents: returns the number of parent clocks. + * @get_best_match_freq: Find a best matching frequency for a frequency + * range. + * - match_freq: Best matching frequency in Hz. + * @set_freq: Set the Clock frequency + * @get_freq: Get the Clock frequency + * - current_freq: Frequency in Hz that the clock is at. + * + * NOTE: for all these functions, the following parameters are generic in + * nature: + * -handle: Pointer to TISCI handle as retrieved by *ti_sci_get_handle + * -did: Device identifier this request is for + * -cid: Clock identifier for the device for this request. + * Each device has it's own set of clock inputs. This indexes + * which clock input to modify. + * -min_freq: The minimum allowable frequency in Hz. This is the minimum + * allowable programmed frequency and does not account for clock + * tolerances and jitter. + * -target_freq: The target clock frequency in Hz. A frequency will be + * processed as close to this target frequency as possible. + * -max_freq: The maximum allowable frequency in Hz. This is the maximum + * allowable programmed frequency and does not account for clock + * tolerances and jitter. + * + * Request for the clock - NOTE: the client MUST maintain integrity of + * usage count by balancing get_clock with put_clock. No refcounting is + * managed by driver for that purpose. + */ +struct ti_sci_clk_ops { + int (*get_clock)(const struct ti_sci_handle *handle, u32 did, u8 cid, + bool needs_ssc, bool can_change_freq, + bool enable_input_term); + int (*idle_clock)(const struct ti_sci_handle *handle, u32 did, u8 cid); + int (*put_clock)(const struct ti_sci_handle *handle, u32 did, u8 cid); + int (*is_auto)(const struct ti_sci_handle *handle, u32 did, u8 cid, + bool *req_state); + int (*is_on)(const struct ti_sci_handle *handle, u32 did, u8 cid, + bool *req_state, bool *current_state); + int (*is_off)(const struct ti_sci_handle *handle, u32 did, u8 cid, + bool *req_state, bool *current_state); + int (*set_parent)(const struct ti_sci_handle *handle, u32 did, u8 cid, + u8 parent_id); + int (*get_parent)(const struct ti_sci_handle *handle, u32 did, u8 cid, + u8 *parent_id); + int (*get_num_parents)(const struct ti_sci_handle *handle, u32 did, + u8 cid, u8 *num_parents); + int (*get_best_match_freq)(const struct ti_sci_handle *handle, u32 did, + u8 cid, u64 min_freq, u64 target_freq, + u64 max_freq, u64 *match_freq); + int (*set_freq)(const struct ti_sci_handle *handle, u32 did, u8 cid, + u64 min_freq, u64 target_freq, u64 max_freq); + int (*get_freq)(const struct ti_sci_handle *handle, u32 did, u8 cid, + u64 *current_freq); +}; + /** * struct ti_sci_ops - Function support for TI SCI * @dev_ops: Device specific operations + * @clk_ops: Clock specific operations */ struct ti_sci_ops { struct ti_sci_dev_ops dev_ops; + struct ti_sci_clk_ops clk_ops; }; /** -- cgit v1.2.3 From 912cffb4ed8612dc99ee0251cc0c9785855162cd Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Tue, 18 Oct 2016 18:08:37 -0500 Subject: firmware: ti_sci: Add support for reboot core service Since system controller now has control over SoC power management, it needs to be explicitly requested to reboot the SoC. Add support for it. In some systems however, SoC needs to toggle a GPIO or send event to an external entity (like a PMIC) for a system reboot to take place. To facilitate that, we allow for a DT property to determine if the reboot handler will be registered and further, the service is also made available to other drivers (such as PMIC driver) to sequence the additional operation and trigger the SoC reboot as the last step. Tested-by: Lokesh Vutla Signed-off-by: Nishanth Menon Signed-off-by: Tero Kristo --- drivers/firmware/ti_sci.c | 83 ++++++++++++++++++++++++++++++++++ drivers/firmware/ti_sci.h | 12 +++++ include/linux/soc/ti/ti_sci_protocol.h | 11 +++++ 3 files changed, 106 insertions(+) (limited to 'include/linux') diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c index 496a007e5c69..874ff32db366 100644 --- a/drivers/firmware/ti_sci.c +++ b/drivers/firmware/ti_sci.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "ti_sci.h" @@ -90,6 +91,7 @@ struct ti_sci_desc { * struct ti_sci_info - Structure representing a TI SCI instance * @dev: Device pointer * @desc: SoC description for this instance + * @nb: Reboot Notifier block * @d: Debugfs file entry * @debug_region: Memory region where the debug message are available * @debug_region_size: Debug region size @@ -104,6 +106,7 @@ struct ti_sci_desc { */ struct ti_sci_info { struct device *dev; + struct notifier_block nb; const struct ti_sci_desc *desc; struct dentry *d; void __iomem *debug_region; @@ -117,10 +120,12 @@ struct ti_sci_info { struct list_head node; /* protected by ti_sci_list_mutex */ int users; + }; #define cl_to_ti_sci_info(c) container_of(c, struct ti_sci_info, cl) #define handle_to_ti_sci_info(h) container_of(h, struct ti_sci_info, handle) +#define reboot_to_ti_sci_info(n) container_of(n, struct ti_sci_info, nb) #ifdef CONFIG_DEBUG_FS @@ -1571,6 +1576,52 @@ fail: return ret; } +static int ti_sci_cmd_core_reboot(const struct ti_sci_handle *handle) +{ + struct ti_sci_info *info; + struct ti_sci_msg_req_reboot *req; + struct ti_sci_msg_hdr *resp; + struct ti_sci_xfer *xfer; + struct device *dev; + int ret = 0; + + if (IS_ERR(handle)) + return PTR_ERR(handle); + if (!handle) + return -EINVAL; + + info = handle_to_ti_sci_info(handle); + dev = info->dev; + + xfer = ti_sci_get_one_xfer(info, TI_SCI_MSG_SYS_RESET, + TI_SCI_FLAG_REQ_ACK_ON_PROCESSED, + sizeof(*req), sizeof(*resp)); + if (IS_ERR(xfer)) { + ret = PTR_ERR(xfer); + dev_err(dev, "Message alloc failed(%d)\n", ret); + return ret; + } + req = (struct ti_sci_msg_req_reboot *)xfer->xfer_buf; + + ret = ti_sci_do_xfer(info, xfer); + if (ret) { + dev_err(dev, "Mbox send fail %d\n", ret); + goto fail; + } + + resp = (struct ti_sci_msg_hdr *)xfer->xfer_buf; + + if (!ti_sci_is_response_ack(resp)) + ret = -ENODEV; + else + ret = 0; + +fail: + ti_sci_put_one_xfer(&info->minfo, xfer); + + return ret; +} + /* * ti_sci_setup_ops() - Setup the operations structures * @info: pointer to TISCI pointer @@ -1578,9 +1629,12 @@ fail: static void ti_sci_setup_ops(struct ti_sci_info *info) { struct ti_sci_ops *ops = &info->handle.ops; + struct ti_sci_core_ops *core_ops = &ops->core_ops; struct ti_sci_dev_ops *dops = &ops->dev_ops; struct ti_sci_clk_ops *cops = &ops->clk_ops; + core_ops->reboot_device = ti_sci_cmd_core_reboot; + dops->get_device = ti_sci_cmd_get_device; dops->idle_device = ti_sci_cmd_idle_device; dops->put_device = ti_sci_cmd_put_device; @@ -1732,6 +1786,18 @@ const struct ti_sci_handle *devm_ti_sci_get_handle(struct device *dev) } EXPORT_SYMBOL_GPL(devm_ti_sci_get_handle); +static int tisci_reboot_handler(struct notifier_block *nb, unsigned long mode, + void *cmd) +{ + struct ti_sci_info *info = reboot_to_ti_sci_info(nb); + const struct ti_sci_handle *handle = &info->handle; + + ti_sci_cmd_core_reboot(handle); + + /* call fail OR pass, we should not be here in the first place */ + return NOTIFY_BAD; +} + /* Description for K2G */ static const struct ti_sci_desc ti_sci_pmmc_k2g_desc = { .host_id = 2, @@ -1759,6 +1825,7 @@ static int ti_sci_probe(struct platform_device *pdev) struct mbox_client *cl; int ret = -EINVAL; int i; + int reboot = 0; of_id = of_match_device(ti_sci_of_match, dev); if (!of_id) { @@ -1773,6 +1840,8 @@ static int ti_sci_probe(struct platform_device *pdev) info->dev = dev; info->desc = desc; + reboot = of_property_read_bool(dev->of_node, + "ti,system-reboot-controller"); INIT_LIST_HEAD(&info->node); minfo = &info->minfo; @@ -1845,6 +1914,17 @@ static int ti_sci_probe(struct platform_device *pdev) ti_sci_setup_ops(info); + if (reboot) { + info->nb.notifier_call = tisci_reboot_handler; + info->nb.priority = 128; + + ret = register_restart_handler(&info->nb); + if (ret) { + dev_err(dev, "reboot registration fail(%d)\n", ret); + return ret; + } + } + dev_info(dev, "ABI: %d.%d (firmware rev 0x%04x '%s')\n", info->handle.version.abi_major, info->handle.version.abi_minor, info->handle.version.firmware_revision, @@ -1874,6 +1954,9 @@ static int ti_sci_remove(struct platform_device *pdev) info = platform_get_drvdata(pdev); + if (info->nb.notifier_call) + unregister_restart_handler(&info->nb); + mutex_lock(&ti_sci_list_mutex); if (info->users) ret = -EBUSY; diff --git a/drivers/firmware/ti_sci.h b/drivers/firmware/ti_sci.h index f69907cfc128..9b611e9e6f6d 100644 --- a/drivers/firmware/ti_sci.h +++ b/drivers/firmware/ti_sci.h @@ -46,6 +46,7 @@ #define TI_SCI_MSG_VERSION 0x0002 #define TI_SCI_MSG_WAKE_REASON 0x0003 #define TI_SCI_MSG_GOODBYE 0x0004 +#define TI_SCI_MSG_SYS_RESET 0x0005 /* Device requests */ #define TI_SCI_MSG_SET_DEVICE_STATE 0x0200 @@ -105,6 +106,17 @@ struct ti_sci_msg_resp_version { u8 abi_minor; } __packed; +/** + * struct ti_sci_msg_req_reboot - Reboot the SoC + * @hdr: Generic Header + * + * Request type is TI_SCI_MSG_SYS_RESET, responded with a generic + * ACK/NACK message. + */ +struct ti_sci_msg_req_reboot { + struct ti_sci_msg_hdr hdr; +} __packed; + /** * struct ti_sci_msg_req_set_device_state - Set the desired state of the device * @hdr: Generic header diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 76378fddf609..0ccbc138c26a 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -35,6 +35,16 @@ struct ti_sci_version_info { struct ti_sci_handle; +/** + * struct ti_sci_core_ops - SoC Core Operations + * @reboot_device: Reboot the SoC + * Returns 0 for successful request(ideally should never return), + * else returns corresponding error value. + */ +struct ti_sci_core_ops { + int (*reboot_device)(const struct ti_sci_handle *handle); +}; + /** * struct ti_sci_dev_ops - Device control operations * @get_device: Command to request for device managed by TISCI @@ -196,6 +206,7 @@ struct ti_sci_clk_ops { * @clk_ops: Clock specific operations */ struct ti_sci_ops { + struct ti_sci_core_ops core_ops; struct ti_sci_dev_ops dev_ops; struct ti_sci_clk_ops clk_ops; }; -- cgit v1.2.3 From 3f817fe718c6cb3ddcc2ab04ba86faecc20ef8fe Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 27 Oct 2016 00:42:01 +0300 Subject: cfg80211: Define IEEE P802.11ai (FILS) information elements Define the Element IDs and Element ID Extensions from IEEE P802.11ai/D11.0. In addition, add a new cfg80211_find_ext_ie() wrapper to make it easier to find information elements that used the Element ID Extension field. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 20 ++++++++++++++++++++ include/net/cfg80211.h | 21 +++++++++++++++++++++ 2 files changed, 41 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index a80516fd65c8..d428adf51446 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1960,6 +1960,26 @@ enum ieee80211_eid { WLAN_EID_VENDOR_SPECIFIC = 221, WLAN_EID_QOS_PARAMETER = 222, + WLAN_EID_CAG_NUMBER = 237, + WLAN_EID_AP_CSN = 239, + WLAN_EID_FILS_INDICATION = 240, + WLAN_EID_DILS = 241, + WLAN_EID_FRAGMENT = 242, + WLAN_EID_EXTENSION = 255 +}; + +/* Element ID Extensions for Element ID 255 */ +enum ieee80211_eid_ext { + WLAN_EID_EXT_ASSOC_DELAY_INFO = 1, + WLAN_EID_EXT_FILS_REQ_PARAMS = 2, + WLAN_EID_EXT_FILS_KEY_CONFIRM = 3, + WLAN_EID_EXT_FILS_SESSION = 4, + WLAN_EID_EXT_FILS_HLP_CONTAINER = 5, + WLAN_EID_EXT_FILS_IP_ADDR_ASSIGN = 6, + WLAN_EID_EXT_KEY_DELIVERY = 7, + WLAN_EID_EXT_FILS_WRAPPED_DATA = 8, + WLAN_EID_EXT_FILS_PUBLIC_KEY = 12, + WLAN_EID_EXT_FILS_NONCE = 13, }; /* Action category code */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index dffc265a4fd6..8ca2e9f354f7 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4180,6 +4180,27 @@ static inline const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) return cfg80211_find_ie_match(eid, ies, len, NULL, 0, 0); } +/** + * cfg80211_find_ext_ie - find information element with EID Extension in data + * + * @ext_eid: element ID Extension + * @ies: data consisting of IEs + * @len: length of data + * + * Return: %NULL if the extended element ID could not be found or if + * the element is invalid (claims to be longer than the given + * data), or a pointer to the first byte of the requested + * element, that is the byte containing the element ID. + * + * Note: There are no checks on the element length other than + * having to fit into the given data. + */ +static inline const u8 *cfg80211_find_ext_ie(u8 ext_eid, const u8 *ies, int len) +{ + return cfg80211_find_ie_match(WLAN_EID_EXTENSION, ies, len, + &ext_eid, 1, 2); +} + /** * cfg80211_find_vendor_ie - find vendor specific information element in data * -- cgit v1.2.3 From 631810603a20874554b2f17adf42b72d0f15eda5 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 27 Oct 2016 00:42:02 +0300 Subject: cfg80211: Add Fast Initial Link Setup (FILS) auth algs This defines authentication algorithms for FILS (IEEE 802.11ai). Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 +++ include/uapi/linux/nl80211.h | 6 ++++++ net/wireless/nl80211.c | 21 +++++++++++++++++++-- 3 files changed, 28 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index d428adf51446..793a0174ba29 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1576,6 +1576,9 @@ struct ieee80211_vht_operation { #define WLAN_AUTH_SHARED_KEY 1 #define WLAN_AUTH_FT 2 #define WLAN_AUTH_SAE 3 +#define WLAN_AUTH_FILS_SK 4 +#define WLAN_AUTH_FILS_SK_PFS 5 +#define WLAN_AUTH_FILS_PK 6 #define WLAN_AUTH_LEAP 128 #define WLAN_AUTH_CHALLENGE_LEN 128 diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 7825fd4db19e..4dc21265cd12 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3669,6 +3669,9 @@ enum nl80211_bss_status { * @NL80211_AUTHTYPE_FT: Fast BSS Transition (IEEE 802.11r) * @NL80211_AUTHTYPE_NETWORK_EAP: Network EAP (some Cisco APs and mainly LEAP) * @NL80211_AUTHTYPE_SAE: Simultaneous authentication of equals + * @NL80211_AUTHTYPE_FILS_SK: Fast Initial Link Setup shared key + * @NL80211_AUTHTYPE_FILS_SK_PFS: Fast Initial Link Setup shared key with PFS + * @NL80211_AUTHTYPE_FILS_PK: Fast Initial Link Setup public key * @__NL80211_AUTHTYPE_NUM: internal * @NL80211_AUTHTYPE_MAX: maximum valid auth algorithm * @NL80211_AUTHTYPE_AUTOMATIC: determine automatically (if necessary by @@ -3681,6 +3684,9 @@ enum nl80211_auth_type { NL80211_AUTHTYPE_FT, NL80211_AUTHTYPE_NETWORK_EAP, NL80211_AUTHTYPE_SAE, + NL80211_AUTHTYPE_FILS_SK, + NL80211_AUTHTYPE_FILS_SK_PFS, + NL80211_AUTHTYPE_FILS_PK, /* keep last */ __NL80211_AUTHTYPE_NUM, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 704851142eed..ff798620e929 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3778,12 +3778,23 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev, if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) && auth_type == NL80211_AUTHTYPE_SAE) return false; + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_FILS_STA) && + (auth_type == NL80211_AUTHTYPE_FILS_SK || + auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || + auth_type == NL80211_AUTHTYPE_FILS_PK)) + return false; return true; case NL80211_CMD_CONNECT: case NL80211_CMD_START_AP: /* SAE not supported yet */ if (auth_type == NL80211_AUTHTYPE_SAE) return false; + /* FILS not supported yet */ + if (auth_type == NL80211_AUTHTYPE_FILS_SK || + auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || + auth_type == NL80211_AUTHTYPE_FILS_PK) + return false; return true; default: return false; @@ -7810,12 +7821,18 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) if (!nl80211_valid_auth_type(rdev, auth_type, NL80211_CMD_AUTHENTICATE)) return -EINVAL; - if (auth_type == NL80211_AUTHTYPE_SAE && + if ((auth_type == NL80211_AUTHTYPE_SAE || + auth_type == NL80211_AUTHTYPE_FILS_SK || + auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || + auth_type == NL80211_AUTHTYPE_FILS_PK) && !info->attrs[NL80211_ATTR_AUTH_DATA]) return -EINVAL; if (info->attrs[NL80211_ATTR_AUTH_DATA]) { - if (auth_type != NL80211_AUTHTYPE_SAE) + if (auth_type != NL80211_AUTHTYPE_SAE && + auth_type != NL80211_AUTHTYPE_FILS_SK && + auth_type != NL80211_AUTHTYPE_FILS_SK_PFS && + auth_type != NL80211_AUTHTYPE_FILS_PK) return -EINVAL; auth_data = nla_data(info->attrs[NL80211_ATTR_AUTH_DATA]); auth_data_len = nla_len(info->attrs[NL80211_ATTR_AUTH_DATA]); -- cgit v1.2.3 From 348bd456699801920a309c66e382380809fbdf41 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 27 Oct 2016 00:42:03 +0300 Subject: cfg80211: Add KEK/nonces for FILS association frames The new nl80211 attributes can be used to provide KEK and nonces to allow the driver to encrypt and decrypt FILS (Re)Association Request/Response frames in station mode. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 +++ include/net/cfg80211.h | 9 +++++++++ include/uapi/linux/nl80211.h | 8 ++++++++ net/wireless/nl80211.c | 12 ++++++++++++ 4 files changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 793a0174ba29..fe849329511a 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2096,6 +2096,9 @@ enum ieee80211_key_len { #define IEEE80211_GCMP_MIC_LEN 16 #define IEEE80211_GCMP_PN_LEN 6 +#define FILS_NONCE_LEN 16 +#define FILS_MAX_KEK_LEN 64 + /* Public action codes */ enum ieee80211_pub_actioncode { WLAN_PUB_ACTION_EXT_CHANSW_ANN = 4, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8ca2e9f354f7..738b4d8a4666 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1840,6 +1840,12 @@ enum cfg80211_assoc_req_flags { * @ht_capa_mask: The bits of ht_capa which are to be used. * @vht_capa: VHT capability override * @vht_capa_mask: VHT capability mask indicating which fields to use + * @fils_kek: FILS KEK for protecting (Re)Association Request/Response frame or + * %NULL if FILS is not used. + * @fils_kek_len: Length of fils_kek in octets + * @fils_nonces: FILS nonces (part of AAD) for protecting (Re)Association + * Request/Response frame or %NULL if FILS is not used. This field starts + * with 16 octets of STA Nonce followed by 16 octets of AP Nonce. */ struct cfg80211_assoc_request { struct cfg80211_bss *bss; @@ -1851,6 +1857,9 @@ struct cfg80211_assoc_request { struct ieee80211_ht_cap ht_capa; struct ieee80211_ht_cap ht_capa_mask; struct ieee80211_vht_cap vht_capa, vht_capa_mask; + const u8 *fils_kek; + size_t fils_kek_len; + const u8 *fils_nonces; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 4dc21265cd12..a268a009528a 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1944,6 +1944,11 @@ enum nl80211_commands { * attribute. * @NL80211_ATTR_NAN_MATCH: used to report a match. This is a nested attribute. * See &enum nl80211_nan_match_attributes. + * @NL80211_ATTR_FILS_KEK: KEK for FILS (Re)Association Request/Response frame + * protection. + * @NL80211_ATTR_FILS_NONCES: Nonces (part of AAD) for FILS (Re)Association + * Request/Response frame protection. This attribute contains the 16 octet + * STA Nonce followed by 16 octets of AP Nonce. * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined @@ -2344,6 +2349,9 @@ enum nl80211_attrs { NL80211_ATTR_NAN_FUNC, NL80211_ATTR_NAN_MATCH, + NL80211_ATTR_FILS_KEK, + NL80211_ATTR_FILS_NONCES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ff798620e929..667d5f719c22 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -414,6 +414,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 }, [NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 }, [NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED }, + [NL80211_ATTR_FILS_KEK] = { .type = NLA_BINARY, + .len = FILS_MAX_KEK_LEN }, + [NL80211_ATTR_FILS_NONCES] = { .len = 2 * FILS_NONCE_LEN }, }; /* policy for the key attributes */ @@ -8033,6 +8036,15 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) req.flags |= ASSOC_REQ_USE_RRM; } + if (info->attrs[NL80211_ATTR_FILS_KEK]) { + req.fils_kek = nla_data(info->attrs[NL80211_ATTR_FILS_KEK]); + req.fils_kek_len = nla_len(info->attrs[NL80211_ATTR_FILS_KEK]); + if (!info->attrs[NL80211_ATTR_FILS_NONCES]) + return -EINVAL; + req.fils_nonces = + nla_data(info->attrs[NL80211_ATTR_FILS_NONCES]); + } + err = nl80211_crypto_settings(rdev, info, &req.crypto, 1); if (!err) { wdev_lock(dev->ieee80211_ptr); -- cgit v1.2.3 From d705ff38189fcfbbfa6aa97363d30c23348ad166 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 3 Oct 2016 11:18:33 +0200 Subject: tty: vt, cleanup and document con_scroll Scrolling helpers scrup and scrdown both accept 'top' and 'bottom' as unsigned int. Number of lines 'nr' is accepted as int, but all callers pass down unsigned too. So change the type of 'nr' to unsigned too. Now, promote unsigned int from the helpers up to the con_scroll hook which actually accepted all those as signed int. Next, the 'dir' parameter can have only two values and we define constants for that: SM_UP and SM_DOWN. Switch them to enum and do proper type checking on 'dir' too. Finally, document the behaviour of the hook. Signed-off-by: Jiri Slaby Cc: Thomas Winischhofer Cc: Tomi Valkeinen Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Cc: Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 6 ++++-- drivers/usb/misc/sisusbvga/sisusb_con.c | 18 ++++++++++-------- drivers/video/console/fbcon.c | 18 ++++++++---------- drivers/video/console/mdacon.c | 7 ++++--- drivers/video/console/newport_con.c | 8 ++++---- drivers/video/console/sticon.c | 7 ++++--- drivers/video/console/vgacon.c | 12 +++++------- include/linux/console.h | 16 +++++++++++----- 8 files changed, 50 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 06fb39c1d6dd..c4bf96fee32e 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -315,7 +315,8 @@ void schedule_console_callback(void) schedule_work(&console_work); } -static void scrup(struct vc_data *vc, unsigned int t, unsigned int b, int nr) +static void scrup(struct vc_data *vc, unsigned int t, unsigned int b, + unsigned int nr) { unsigned short *d, *s; @@ -332,7 +333,8 @@ static void scrup(struct vc_data *vc, unsigned int t, unsigned int b, int nr) vc->vc_size_row * nr); } -static void scrdown(struct vc_data *vc, unsigned int t, unsigned int b, int nr) +static void scrdown(struct vc_data *vc, unsigned int t, unsigned int b, + unsigned int nr) { unsigned short *s; unsigned int step; diff --git a/drivers/usb/misc/sisusbvga/sisusb_con.c b/drivers/usb/misc/sisusbvga/sisusb_con.c index 460cebf322e3..6331965daa0b 100644 --- a/drivers/usb/misc/sisusbvga/sisusb_con.c +++ b/drivers/usb/misc/sisusbvga/sisusb_con.c @@ -808,9 +808,10 @@ sisusbcon_cursor(struct vc_data *c, int mode) mutex_unlock(&sisusb->lock); } -static int +static bool sisusbcon_scroll_area(struct vc_data *c, struct sisusb_usb_data *sisusb, - int t, int b, int dir, int lines) + unsigned int t, unsigned int b, enum con_scroll dir, + unsigned int lines) { int cols = sisusb->sisusb_num_columns; int length = ((b - t) * cols) * 2; @@ -852,8 +853,9 @@ sisusbcon_scroll_area(struct vc_data *c, struct sisusb_usb_data *sisusb, } /* Interface routine */ -static int -sisusbcon_scroll(struct vc_data *c, int t, int b, int dir, int lines) +static bool +sisusbcon_scroll(struct vc_data *c, unsigned int t, unsigned int b, + enum con_scroll dir, unsigned int lines) { struct sisusb_usb_data *sisusb; u16 eattr = c->vc_video_erase_char; @@ -870,17 +872,17 @@ sisusbcon_scroll(struct vc_data *c, int t, int b, int dir, int lines) */ if (!lines) - return 1; + return true; sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num); if (!sisusb) - return 0; + return false; /* sisusb->lock is down */ if (sisusb_is_inactive(c, sisusb)) { mutex_unlock(&sisusb->lock); - return 0; + return false; } /* Special case */ @@ -971,7 +973,7 @@ sisusbcon_scroll(struct vc_data *c, int t, int b, int dir, int lines) mutex_unlock(&sisusb->lock); - return 1; + return true; } /* Interface routine */ diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c index b87f5cfdaea5..a44f5627b82a 100644 --- a/drivers/video/console/fbcon.c +++ b/drivers/video/console/fbcon.c @@ -164,8 +164,6 @@ static void fbcon_putcs(struct vc_data *vc, const unsigned short *s, int count, int ypos, int xpos); static void fbcon_clear_margins(struct vc_data *vc, int bottom_only); static void fbcon_cursor(struct vc_data *vc, int mode); -static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir, - int count); static void fbcon_bmove(struct vc_data *vc, int sy, int sx, int dy, int dx, int height, int width); static int fbcon_switch(struct vc_data *vc); @@ -1795,15 +1793,15 @@ static inline void fbcon_softback_note(struct vc_data *vc, int t, softback_curr = softback_in; } -static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir, - int count) +static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b, + enum con_scroll dir, unsigned int count) { struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; struct display *p = &fb_display[vc->vc_num]; int scroll_partial = info->flags & FBINFO_PARTIAL_PAN_OK; if (fbcon_is_inactive(vc, info)) - return -EINVAL; + return true; fbcon_cursor(vc, CM_ERASE); @@ -1831,7 +1829,7 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir, (b - count)), vc->vc_video_erase_char, vc->vc_size_row * count); - return 1; + return true; break; case SCROLL_WRAP_MOVE: @@ -1903,7 +1901,7 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir, (b - count)), vc->vc_video_erase_char, vc->vc_size_row * count); - return 1; + return true; } break; @@ -1922,7 +1920,7 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir, t), vc->vc_video_erase_char, vc->vc_size_row * count); - return 1; + return true; break; case SCROLL_WRAP_MOVE: @@ -1992,10 +1990,10 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir, t), vc->vc_video_erase_char, vc->vc_size_row * count); - return 1; + return true; } } - return 0; + return false; } diff --git a/drivers/video/console/mdacon.c b/drivers/video/console/mdacon.c index bacbb044d77c..ec192a1bf297 100644 --- a/drivers/video/console/mdacon.c +++ b/drivers/video/console/mdacon.c @@ -488,12 +488,13 @@ static void mdacon_cursor(struct vc_data *c, int mode) } } -static int mdacon_scroll(struct vc_data *c, int t, int b, int dir, int lines) +static bool mdacon_scroll(struct vc_data *c, unsigned int t, unsigned int b, + enum con_scroll dir, unsigned int lines) { u16 eattr = mda_convert_attr(c->vc_video_erase_char); if (!lines) - return 0; + return false; if (lines > c->vc_rows) /* maximum realistic size */ lines = c->vc_rows; @@ -514,7 +515,7 @@ static int mdacon_scroll(struct vc_data *c, int t, int b, int dir, int lines) break; } - return 0; + return false; } diff --git a/drivers/video/console/newport_con.c b/drivers/video/console/newport_con.c index e3b9521e4ec3..1e11614322fe 100644 --- a/drivers/video/console/newport_con.c +++ b/drivers/video/console/newport_con.c @@ -574,8 +574,8 @@ static int newport_font_set(struct vc_data *vc, struct console_font *font, unsig return newport_set_font(vc->vc_num, font); } -static int newport_scroll(struct vc_data *vc, int t, int b, int dir, - int lines) +static bool newport_scroll(struct vc_data *vc, unsigned int t, unsigned int b, + enum con_scroll dir, unsigned int lines) { int count, x, y; unsigned short *s, *d; @@ -595,7 +595,7 @@ static int newport_scroll(struct vc_data *vc, int t, int b, int dir, (vc->vc_color & 0xf0) >> 4); } npregs->cset.topscan = (topscan - 1) & 0x3ff; - return 0; + return false; } count = (b - t - lines) * vc->vc_cols; @@ -670,7 +670,7 @@ static int newport_scroll(struct vc_data *vc, int t, int b, int dir, } } } - return 1; + return true; } static int newport_dummy(struct vc_data *c) diff --git a/drivers/video/console/sticon.c b/drivers/video/console/sticon.c index 3a10ac19598f..79c9bd8d3025 100644 --- a/drivers/video/console/sticon.c +++ b/drivers/video/console/sticon.c @@ -153,12 +153,13 @@ static void sticon_cursor(struct vc_data *conp, int mode) } } -static int sticon_scroll(struct vc_data *conp, int t, int b, int dir, int count) +static bool sticon_scroll(struct vc_data *conp, unsigned int t, + unsigned int b, enum con_scroll dir, unsigned int count) { struct sti_struct *sti = sticon_sti; if (vga_is_gfx) - return 0; + return false; sticon_cursor(conp, CM_ERASE); @@ -174,7 +175,7 @@ static int sticon_scroll(struct vc_data *conp, int t, int b, int dir, int count) break; } - return 0; + return false; } static void sticon_init(struct vc_data *c, int init) diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index 11576611a974..4c54a873452e 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -83,8 +83,6 @@ static int vgacon_blank(struct vc_data *c, int blank, int mode_switch); static void vgacon_scrolldelta(struct vc_data *c, int lines); static int vgacon_set_origin(struct vc_data *c); static void vgacon_save_screen(struct vc_data *c); -static int vgacon_scroll(struct vc_data *c, int t, int b, int dir, - int lines); static void vgacon_invert_region(struct vc_data *c, u16 * p, int count); static struct uni_pagedir *vgacon_uni_pagedir; static int vgacon_refcount; @@ -1350,17 +1348,17 @@ static void vgacon_save_screen(struct vc_data *c) c->vc_screenbuf_size > vga_vram_size ? vga_vram_size : c->vc_screenbuf_size); } -static int vgacon_scroll(struct vc_data *c, int t, int b, int dir, - int lines) +static bool vgacon_scroll(struct vc_data *c, unsigned int t, unsigned int b, + enum con_scroll dir, unsigned int lines) { unsigned long oldo; unsigned int delta; if (t || b != c->vc_rows || vga_is_gfx || c->vc_mode != KD_TEXT) - return 0; + return false; if (!vga_hardscroll_enabled || lines >= c->vc_rows / 2) - return 0; + return false; vgacon_restore_screen(c); oldo = c->vc_origin; @@ -1396,7 +1394,7 @@ static int vgacon_scroll(struct vc_data *c, int t, int b, int dir, c->vc_visible_origin = c->vc_origin; vga_set_mem_top(c); c->vc_pos = (c->vc_pos - oldo) + c->vc_origin; - return 1; + return true; } diff --git a/include/linux/console.h b/include/linux/console.h index 3672809234a7..508b012bd5bd 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -28,9 +28,17 @@ struct tty_struct; #define VT100ID "\033[?1;2c" #define VT102ID "\033[?6c" +enum con_scroll { + SM_UP, + SM_DOWN, +}; + /** * struct consw - callbacks for consoles * + * @con_scroll: move lines from @top to @bottom in direction @dir by @lines. + * Return true if no generic handling should be done. + * Invoked by csi_M and printing to the console. * @con_set_palette: sets the palette of the console to @table (optional) * @con_scrolldelta: the contents of the console should be scrolled by @lines. * Invoked by user. (optional) @@ -44,7 +52,9 @@ struct consw { void (*con_putc)(struct vc_data *, int, int, int); void (*con_putcs)(struct vc_data *, const unsigned short *, int, int, int); void (*con_cursor)(struct vc_data *, int); - int (*con_scroll)(struct vc_data *, int, int, int, int); + bool (*con_scroll)(struct vc_data *, unsigned int top, + unsigned int bottom, enum con_scroll dir, + unsigned int lines); int (*con_switch)(struct vc_data *); int (*con_blank)(struct vc_data *, int, int); int (*con_font_set)(struct vc_data *, struct console_font *, unsigned); @@ -99,10 +109,6 @@ static inline int con_debug_leave(void) } #endif -/* scroll */ -#define SM_UP (1) -#define SM_DOWN (2) - /* cursor */ #define CM_DRAW (1) #define CM_ERASE (2) -- cgit v1.2.3 From 35cc56f9a30480c8a0cca809cf341614a2144758 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 3 Oct 2016 11:18:35 +0200 Subject: tty: vgacon+sisusb, move scrolldelta to a common helper The code is mirrorred in scrolldelta implementations of both vgacon and sisusb. Let's move the code to a separate helper where we will perform a common cleanup and further changes. While we are moving the code, make it linear and save one indentation level. This is done by returning from the "!lines" then-branch immediatelly. This allows flushing the else-branch 1 level to the left, obviously. Few more new lines and comments were added too. And do not forget to export the helper function given sisusb can be built as module. Signed-off-by: Jiri Slaby Cc: Thomas Winischhofer Cc: Tomi Valkeinen Cc: Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 38 +++++++++++++++++++++++++++++++++ drivers/usb/misc/sisusbvga/sisusb_con.c | 37 ++------------------------------ drivers/video/console/vgacon.c | 27 ++--------------------- include/linux/vt_kern.h | 2 ++ 4 files changed, 44 insertions(+), 60 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index a0b7576747cd..2eab714aab67 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -4279,6 +4279,44 @@ void vcs_scr_updated(struct vc_data *vc) notify_update(vc); } +void vc_scrolldelta_helper(struct vc_data *c, int lines, + unsigned int rolled_over, void *base, unsigned int size) +{ + unsigned long ubase = (unsigned long)base; + int margin = c->vc_size_row * 4; + int ul, we, p, st; + + /* Turn scrollback off */ + if (!lines) { + c->vc_visible_origin = c->vc_origin; + return; + } + + /* Do we have already enough to allow jumping from 0 to the end? */ + if (rolled_over > (c->vc_scr_end - ubase) + margin) { + ul = c->vc_scr_end - ubase; + we = rolled_over + c->vc_size_row; + } else { + ul = 0; + we = size; + } + + p = (c->vc_visible_origin - ubase - ul + we) % we + + lines * c->vc_size_row; + st = (c->vc_origin - ubase - ul + we) % we; + + /* Only a little piece would be left? Show all incl. the piece! */ + if (st < 2 * margin) + margin = 0; + if (p < margin) + p = 0; + if (p > st - margin) + p = st; + + c->vc_visible_origin = ubase + (p + ul) % we; +} +EXPORT_SYMBOL_GPL(vc_scrolldelta_helper); + /* * Visible symbols for modules */ diff --git a/drivers/usb/misc/sisusbvga/sisusb_con.c b/drivers/usb/misc/sisusbvga/sisusb_con.c index 6331965daa0b..4b5777ec1501 100644 --- a/drivers/usb/misc/sisusbvga/sisusb_con.c +++ b/drivers/usb/misc/sisusbvga/sisusb_con.c @@ -686,8 +686,6 @@ static void sisusbcon_scrolldelta(struct vc_data *c, int lines) { struct sisusb_usb_data *sisusb; - int margin = c->vc_size_row * 4; - int ul, we, p, st; sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num); if (!sisusb) @@ -700,39 +698,8 @@ sisusbcon_scrolldelta(struct vc_data *c, int lines) return; } - if (!lines) /* Turn scrollback off */ - c->vc_visible_origin = c->vc_origin; - else { - - if (sisusb->con_rolled_over > - (c->vc_scr_end - sisusb->scrbuf) + margin) { - - ul = c->vc_scr_end - sisusb->scrbuf; - we = sisusb->con_rolled_over + c->vc_size_row; - - } else { - - ul = 0; - we = sisusb->scrbuf_size; - - } - - p = (c->vc_visible_origin - sisusb->scrbuf - ul + we) % we + - lines * c->vc_size_row; - - st = (c->vc_origin - sisusb->scrbuf - ul + we) % we; - - if (st < 2 * margin) - margin = 0; - - if (p < margin) - p = 0; - - if (p > st - margin) - p = st; - - c->vc_visible_origin = sisusb->scrbuf + (p + ul) % we; - } + vc_scrolldelta_helper(c, lines, sisusb->con_rolled_over, + (void *)sisusb->scrbuf, sisusb->scrbuf_size); sisusbcon_set_start_address(sisusb, c); diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index 4c54a873452e..ede6a5a85ccd 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -332,31 +332,8 @@ static void vgacon_restore_screen(struct vc_data *c) static void vgacon_scrolldelta(struct vc_data *c, int lines) { - if (!lines) /* Turn scrollback off */ - c->vc_visible_origin = c->vc_origin; - else { - int margin = c->vc_size_row * 4; - int ul, we, p, st; - - if (vga_rolled_over > - (c->vc_scr_end - vga_vram_base) + margin) { - ul = c->vc_scr_end - vga_vram_base; - we = vga_rolled_over + c->vc_size_row; - } else { - ul = 0; - we = vga_vram_size; - } - p = (c->vc_visible_origin - vga_vram_base - ul + we) % we + - lines * c->vc_size_row; - st = (c->vc_origin - vga_vram_base - ul + we) % we; - if (st < 2 * margin) - margin = 0; - if (p < margin) - p = 0; - if (p > st - margin) - p = st; - c->vc_visible_origin = vga_vram_base + (p + ul) % we; - } + vc_scrolldelta_helper(c, lines, vga_rolled_over, (void *)vga_vram_base, + vga_vram_size); vga_set_mem_top(c); } #endif /* CONFIG_VGACON_SOFT_SCROLLBACK */ diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h index 6abd24f258bc..833fdd4794a0 100644 --- a/include/linux/vt_kern.h +++ b/include/linux/vt_kern.h @@ -191,5 +191,7 @@ extern void vt_set_led_state(int console, int leds); extern void vt_kbd_con_start(int console); extern void vt_kbd_con_stop(int console); +void vc_scrolldelta_helper(struct vc_data *c, int lines, + unsigned int rolled_over, void *_base, unsigned int size); #endif /* _VT_KERN_H */ -- cgit v1.2.3 From a07ea4d9941af5a0c6f0be2a71b51ac9c083c5e5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Oct 2016 14:40:02 +0200 Subject: genetlink: no longer support using static family IDs Static family IDs have never really been used, the only use case was the workaround I introduced for those users that assumed their family ID was also their multicast group ID. Additionally, because static family IDs would never be reserved by the generic netlink code, using a relatively low ID would only work for built-in families that can be registered immediately after generic netlink is started, which is basically only the control family (apart from the workaround code, which I also had to add code for so it would reserve those IDs) Thus, anything other than GENL_ID_GENERATE is flawed and luckily not used except in the cases I mentioned. Move those workarounds into a few lines of code, and then get rid of GENL_ID_GENERATE entirely, making it more robust. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- drivers/acpi/event.c | 1 - drivers/net/gtp.c | 1 - drivers/net/macsec.c | 1 - drivers/net/team/team.c | 1 - drivers/net/wireless/mac80211_hwsim.c | 1 - drivers/scsi/pmcraid.c | 6 ------ drivers/target/target_core_user.c | 1 - drivers/thermal/thermal_core.c | 1 - fs/dlm/netlink.c | 1 - fs/quota/netlink.c | 7 ------- include/linux/genl_magic_func.h | 1 - include/net/genetlink.h | 7 ++----- include/uapi/linux/genetlink.h | 1 - kernel/taskstats.c | 1 - net/batman-adv/netlink.c | 1 - net/core/devlink.c | 1 - net/core/drop_monitor.c | 1 - net/hsr/hsr_netlink.c | 1 - net/ieee802154/netlink.c | 1 - net/ieee802154/nl802154.c | 1 - net/ipv4/fou.c | 1 - net/ipv4/tcp_metrics.c | 1 - net/ipv6/ila/ila_xlat.c | 1 - net/irda/irnetlink.c | 1 - net/l2tp/l2tp_netlink.c | 1 - net/netfilter/ipvs/ip_vs_ctl.c | 1 - net/netlabel/netlabel_calipso.c | 1 - net/netlabel/netlabel_cipso_v4.c | 1 - net/netlabel/netlabel_mgmt.c | 1 - net/netlabel/netlabel_unlabeled.c | 1 - net/netlink/genetlink.c | 37 +++++++++++++++++++++-------------- net/nfc/netlink.c | 1 - net/openvswitch/datapath.c | 4 ---- net/tipc/netlink.c | 1 - net/tipc/netlink_compat.c | 1 - net/wimax/stack.c | 1 - net/wireless/nl80211.c | 1 - 37 files changed, 24 insertions(+), 69 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c index e24ea4e796e4..8dfca3d53131 100644 --- a/drivers/acpi/event.c +++ b/drivers/acpi/event.c @@ -83,7 +83,6 @@ static const struct genl_multicast_group acpi_event_mcgrps[] = { }; static struct genl_family acpi_event_genl_family = { - .id = GENL_ID_GENERATE, .name = ACPI_GENL_FAMILY_NAME, .version = ACPI_GENL_VERSION, .maxattr = ACPI_GENL_ATTR_MAX, diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 97e0cbca0a08..f66737ba1299 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1095,7 +1095,6 @@ static int gtp_genl_del_pdp(struct sk_buff *skb, struct genl_info *info) } static struct genl_family gtp_genl_family = { - .id = GENL_ID_GENERATE, .name = "gtp", .version = 0, .hdrsize = 0, diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 1a134cb2d52c..a5309b81a786 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1422,7 +1422,6 @@ static void clear_tx_sa(struct macsec_tx_sa *tx_sa) } static struct genl_family macsec_fam = { - .id = GENL_ID_GENERATE, .name = MACSEC_GENL_NAME, .hdrsize = 0, .version = MACSEC_GENL_VERSION, diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index a380649bf6b5..0b50205764ff 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2151,7 +2151,6 @@ static struct rtnl_link_ops team_link_ops __read_mostly = { ***********************************/ static struct genl_family team_nl_family = { - .id = GENL_ID_GENERATE, .name = TEAM_GENL_NAME, .version = TEAM_GENL_VERSION, .maxattr = TEAM_ATTR_MAX, diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index e95b79bccf9b..54b6cd62676e 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -589,7 +589,6 @@ struct hwsim_radiotap_ack_hdr { /* MAC80211_HWSIM netlinf family */ static struct genl_family hwsim_genl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = "MAC80211_HWSIM", .version = 1, diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index 68a5c347fae9..cc50eb87b28a 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -1369,12 +1369,6 @@ static struct genl_multicast_group pmcraid_mcgrps[] = { }; static struct genl_family pmcraid_event_family = { - /* - * Due to prior multicast group abuse (the code having assumed that - * the family ID can be used as a multicast group ID) we need to - * statically allocate a family (and thus group) ID. - */ - .id = GENL_ID_PMCRAID, .name = "pmcraid", .version = 1, .maxattr = PMCRAID_AEN_ATTR_MAX, diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 62bf4fe5704a..313a0ef3cda7 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -148,7 +148,6 @@ static const struct genl_multicast_group tcmu_mcgrps[] = { /* Our generic netlink family */ static struct genl_family tcmu_genl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = "TCM-USER", .version = 1, diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 226b0b4aced6..68d7503f6417 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -2164,7 +2164,6 @@ static const struct genl_multicast_group thermal_event_mcgrps[] = { }; static struct genl_family thermal_event_genl_family = { - .id = GENL_ID_GENERATE, .name = THERMAL_GENL_FAMILY_NAME, .version = THERMAL_GENL_VERSION, .maxattr = THERMAL_GENL_ATTR_MAX, diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index 1e6e227134d7..00d226956264 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -17,7 +17,6 @@ static uint32_t dlm_nl_seqnum; static uint32_t listener_nlportid; static struct genl_family family = { - .id = GENL_ID_GENERATE, .name = DLM_GENL_NAME, .version = DLM_GENL_VERSION, }; diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c index 8b252673d454..3965a5cdfaa2 100644 --- a/fs/quota/netlink.c +++ b/fs/quota/netlink.c @@ -13,13 +13,6 @@ static const struct genl_multicast_group quota_mcgrps[] = { /* Netlink family structure for quota */ static struct genl_family quota_genl_family = { - /* - * Needed due to multicast group ID abuse - old code assumed - * the family ID was also a valid multicast group ID (which - * isn't true) and userspace might thus rely on it. Assign a - * static ID for this group to make dealing with that easier. - */ - .id = GENL_ID_VFS_DQUOT, .hdrsize = 0, .name = "VFS_DQUOT", .version = 1, diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 667c31101b8b..7c070c1fe457 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -260,7 +260,6 @@ static struct genl_ops ZZZ_genl_ops[] __read_mostly = { */ #define ZZZ_genl_family CONCAT_(GENL_MAGIC_FAMILY, _genl_family) static struct genl_family ZZZ_genl_family __read_mostly = { - .id = GENL_ID_GENERATE, .name = __stringify(GENL_MAGIC_FAMILY), .version = GENL_MAGIC_VERSION, #ifdef GENL_MAGIC_FAMILY_HDRSZ diff --git a/include/net/genetlink.h b/include/net/genetlink.h index ef9defb3f5bc..43a5c3975a2f 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -20,7 +20,7 @@ struct genl_info; /** * struct genl_family - generic netlink family - * @id: protocol family idenfitier + * @id: protocol family identifier (private) * @hdrsize: length of user specific header in bytes * @name: name of family * @version: protocol version @@ -48,7 +48,7 @@ struct genl_info; * @n_ops: number of operations supported by this family (private) */ struct genl_family { - unsigned int id; + unsigned int id; /* private */ unsigned int hdrsize; char name[GENL_NAMSIZ]; unsigned int version; @@ -149,9 +149,6 @@ static inline int genl_register_family(struct genl_family *family) * Registers the specified family and operations from the specified table. * Only one family may be registered with the same family name or identifier. * - * The family id may equal GENL_ID_GENERATE causing an unique id to - * be automatically generated and assigned. - * * Either a doit or dumpit callback must be specified for every registered * operation or the function will fail. Only one operation structure per * command identifier may be registered. diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h index 5512c90af7e3..d9b2db4a29c6 100644 --- a/include/uapi/linux/genetlink.h +++ b/include/uapi/linux/genetlink.h @@ -26,7 +26,6 @@ struct genlmsghdr { /* * List of reserved static generic netlink identifiers: */ -#define GENL_ID_GENERATE 0 #define GENL_ID_CTRL NLMSG_MIN_TYPE #define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1) #define GENL_ID_PMCRAID (NLMSG_MIN_TYPE + 2) diff --git a/kernel/taskstats.c b/kernel/taskstats.c index b3f05ee20d18..d7a1a9461a10 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -42,7 +42,6 @@ static int family_registered; struct kmem_cache *taskstats_cache; static struct genl_family family = { - .id = GENL_ID_GENERATE, .name = TASKSTATS_GENL_NAME, .version = TASKSTATS_GENL_VERSION, .maxattr = TASKSTATS_CMD_ATTR_MAX, diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 64cb6acbe0a6..a03b0ed7e8dd 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -49,7 +49,6 @@ #include "translation-table.h" struct genl_family batadv_netlink_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = BATADV_NL_NAME, .version = 1, diff --git a/net/core/devlink.c b/net/core/devlink.c index d2fd736de6a2..3008d9c33875 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -342,7 +342,6 @@ static void devlink_nl_post_doit(const struct genl_ops *ops, } static struct genl_family devlink_nl_family = { - .id = GENL_ID_GENERATE, .name = DEVLINK_GENL_NAME, .version = DEVLINK_GENL_VERSION, .maxattr = DEVLINK_ATTR_MAX, diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 72cfb0c61125..a5320dfcd978 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -60,7 +60,6 @@ struct dm_hw_stat_delta { }; static struct genl_family net_drop_monitor_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = "NET_DM", .version = 2, diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index d4d1617f43a8..2ad039492bee 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -132,7 +132,6 @@ static const struct nla_policy hsr_genl_policy[HSR_A_MAX + 1] = { }; static struct genl_family hsr_genl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = "HSR", .version = 1, diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index c8133c07ceee..19144158b696 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -29,7 +29,6 @@ static unsigned int ieee802154_seq_num; static DEFINE_SPINLOCK(ieee802154_seq_lock); struct genl_family nl802154_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = IEEE802154_NL_NAME, .version = 1, diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 21aabadccd0e..182299858f1d 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -34,7 +34,6 @@ static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb, /* the netlink family */ static struct genl_family nl802154_fam = { - .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ .name = NL802154_GENL_NAME, /* have users key off the name instead */ .hdrsize = 0, /* no private header */ .version = 1, /* no particular meaning now */ diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index cf50f7e2b012..e3fc527c5d37 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -623,7 +623,6 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg) } static struct genl_family fou_nl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = FOU_GENL_NAME, .version = FOU_GENL_VERSION, diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index bf1f3b2b29d1..3da305127b32 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -743,7 +743,6 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss, } static struct genl_family tcp_metrics_nl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = TCP_METRICS_GENL_NAME, .version = TCP_METRICS_GENL_VERSION, diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index e604013dd814..0d57e27d1cdd 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -119,7 +119,6 @@ static const struct rhashtable_params rht_params = { }; static struct genl_family ila_nl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = ILA_GENL_NAME, .version = ILA_GENL_VERSION, diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c index e15c40e86660..f23b81aa91fe 100644 --- a/net/irda/irnetlink.c +++ b/net/irda/irnetlink.c @@ -25,7 +25,6 @@ static struct genl_family irda_nl_family = { - .id = GENL_ID_GENERATE, .name = IRDA_NL_NAME, .hdrsize = 0, .version = IRDA_NL_VERSION, diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index bf3117771822..4fbf1f41ac52 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -32,7 +32,6 @@ static struct genl_family l2tp_nl_family = { - .id = GENL_ID_GENERATE, .name = L2TP_GENL_NAME, .version = L2TP_GENL_VERSION, .hdrsize = 0, diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c3c809b2e712..ceed66cdd03e 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2841,7 +2841,6 @@ static struct nf_sockopt_ops ip_vs_sockopts = { /* IPVS genetlink family */ static struct genl_family ip_vs_genl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = IPVS_GENL_NAME, .version = IPVS_GENL_VERSION, diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c index 2ec93c5e77bb..152e503b8c5d 100644 --- a/net/netlabel/netlabel_calipso.c +++ b/net/netlabel/netlabel_calipso.c @@ -61,7 +61,6 @@ struct netlbl_domhsh_walk_arg { /* NetLabel Generic NETLINK CALIPSO family */ static struct genl_family netlbl_calipso_gnl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = NETLBL_NLTYPE_CALIPSO_NAME, .version = NETLBL_PROTO_VERSION, diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 7fd1104ba900..755b284e7ad4 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -60,7 +60,6 @@ struct netlbl_domhsh_walk_arg { /* NetLabel Generic NETLINK CIPSOv4 family */ static struct genl_family netlbl_cipsov4_gnl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = NETLBL_NLTYPE_CIPSOV4_NAME, .version = NETLBL_PROTO_VERSION, diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index f85d0e07af2d..3b00f2368fcd 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -61,7 +61,6 @@ struct netlbl_domhsh_walk_arg { /* NetLabel Generic NETLINK CIPSOv4 family */ static struct genl_family netlbl_mgmt_gnl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = NETLBL_NLTYPE_MGMT_NAME, .version = NETLBL_PROTO_VERSION, diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 4528cff9138b..c2ea8d1f653a 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -124,7 +124,6 @@ static u8 netlabel_unlabel_acceptflg; /* NetLabel Generic NETLINK unlabeled family */ static struct genl_family netlbl_unlabel_gnl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = NETLBL_NLTYPE_UNLABELED_NAME, .version = NETLBL_PROTO_VERSION, diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 01291b7a27bb..f19ec969edee 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -349,8 +349,6 @@ static int genl_validate_ops(const struct genl_family *family) * * Registers the specified family after validating it first. Only one * family may be registered with the same family name or identifier. - * The family id may equal GENL_ID_GENERATE causing an unique id to - * be automatically generated and assigned. * * The family's ops array must already be assigned, you can use the * genl_register_family_with_ops() helper function. @@ -359,13 +357,7 @@ static int genl_validate_ops(const struct genl_family *family) */ int __genl_register_family(struct genl_family *family) { - int err = -EINVAL, i; - - if (family->id && family->id < GENL_MIN_ID) - goto errout; - - if (family->id > GENL_MAX_ID) - goto errout; + int err, i; err = genl_validate_ops(family); if (err) @@ -378,8 +370,27 @@ int __genl_register_family(struct genl_family *family) goto errout_locked; } - if (family->id == GENL_ID_GENERATE) { - u16 newid = genl_generate_id(); + if (family == &genl_ctrl) { + family->id = GENL_ID_CTRL; + } else { + u16 newid; + + /* this should be left zero in the struct */ + WARN_ON(family->id); + + /* + * Sadly, a few cases need to be special-cased + * due to them having previously abused the API + * and having used their family ID also as their + * multicast group ID, so we use reserved IDs + * for both to be sure we can do that mapping. + */ + if (strcmp(family->name, "pmcraid") == 0) + newid = GENL_ID_PMCRAID; + else if (strcmp(family->name, "VFS_DQUOT") == 0) + newid = GENL_ID_VFS_DQUOT; + else + newid = genl_generate_id(); if (!newid) { err = -ENOMEM; @@ -387,9 +398,6 @@ int __genl_register_family(struct genl_family *family) } family->id = newid; - } else if (genl_family_find_byid(family->id)) { - err = -EEXIST; - goto errout_locked; } if (family->maxattr && !family->parallel_ops) { @@ -419,7 +427,6 @@ int __genl_register_family(struct genl_family *family) errout_locked: genl_unlock_all(); -errout: return err; } EXPORT_SYMBOL(__genl_register_family); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 79786bf62b88..c230403e066c 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -39,7 +39,6 @@ static const struct genl_multicast_group nfc_genl_mcgrps[] = { }; static struct genl_family nfc_genl_family = { - .id = GENL_ID_GENERATE, .hdrsize = 0, .name = NFC_GENL_NAME, .version = NFC_GENL_VERSION, diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 194435aa1165..f9fef7dfba15 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -671,7 +671,6 @@ static const struct genl_ops dp_packet_genl_ops[] = { }; static struct genl_family dp_packet_genl_family = { - .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), .name = OVS_PACKET_FAMILY, .version = OVS_PACKET_VERSION, @@ -1436,7 +1435,6 @@ static const struct genl_ops dp_flow_genl_ops[] = { }; static struct genl_family dp_flow_genl_family = { - .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), .name = OVS_FLOW_FAMILY, .version = OVS_FLOW_VERSION, @@ -1822,7 +1820,6 @@ static const struct genl_ops dp_datapath_genl_ops[] = { }; static struct genl_family dp_datapath_genl_family = { - .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), .name = OVS_DATAPATH_FAMILY, .version = OVS_DATAPATH_VERSION, @@ -2244,7 +2241,6 @@ static const struct genl_ops dp_vport_genl_ops[] = { }; struct genl_family dp_vport_genl_family = { - .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), .name = OVS_VPORT_FAMILY, .version = OVS_VPORT_VERSION, diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 4b94f3cfe3af..383b8fedabc7 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -136,7 +136,6 @@ const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = { * so we have a separate genl handling for the new API. */ struct genl_family tipc_genl_family = { - .id = GENL_ID_GENERATE, .name = TIPC_GENL_V2_NAME, .version = TIPC_GENL_V2_VERSION, .hdrsize = 0, diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 1fd464764765..f04428e4c8e5 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -1216,7 +1216,6 @@ send: } static struct genl_family tipc_genl_compat_family = { - .id = GENL_ID_GENERATE, .name = TIPC_GENL_NAME, .version = TIPC_GENL_VERSION, .hdrsize = TIPC_GENL_HDRLEN, diff --git a/net/wimax/stack.c b/net/wimax/stack.c index 3f816e2971ee..8ac83a41585f 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -573,7 +573,6 @@ size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); struct genl_family wimax_gnl_family = { - .id = GENL_ID_GENERATE, .name = "WiMAX", .version = WIMAX_GNL_VERSION, .hdrsize = 0, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 7d8cb3330c86..714beafe05e0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -39,7 +39,6 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb, /* the netlink family */ static struct genl_family nl80211_fam = { - .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ .name = NL80211_GENL_NAME, /* have users key off the name instead */ .hdrsize = 0, /* no private header */ .version = 1, /* no particular meaning now */ -- cgit v1.2.3 From 489111e5c25b93be80340c3113d71903d7c82136 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Oct 2016 14:40:03 +0200 Subject: genetlink: statically initialize families Instead of providing macros/inline functions to initialize the families, make all users initialize them statically and get rid of the macros. This reduces the kernel code size by about 1.6k on x86-64 (with allyesconfig). Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- drivers/acpi/event.c | 1 + drivers/net/gtp.c | 21 +++++++---- drivers/net/macsec.c | 21 +++++++---- drivers/net/team/team.c | 22 +++++++---- drivers/net/wireless/mac80211_hwsim.c | 26 +++++++------ drivers/scsi/pmcraid.c | 1 + drivers/target/target_core_user.c | 1 + drivers/thermal/thermal_core.c | 1 + fs/dlm/netlink.c | 15 +++++--- fs/quota/netlink.c | 1 + include/linux/drbd_genl.h | 2 +- include/linux/genl_magic_func.h | 28 ++++++++------ include/net/genetlink.h | 71 ++++++----------------------------- kernel/taskstats.c | 17 ++++++--- net/batman-adv/netlink.c | 25 +++++++----- net/core/devlink.c | 27 +++++++------ net/core/drop_monitor.c | 20 ++++++---- net/hsr/hsr_netlink.c | 22 +++++++---- net/ieee802154/netlink.c | 23 +++++++----- net/ieee802154/nl802154.c | 34 ++++++++--------- net/ipv4/fou.c | 22 ++++++----- net/ipv4/tcp_metrics.c | 22 ++++++----- net/ipv6/ila/ila_xlat.c | 24 +++++++----- net/irda/irnetlink.c | 19 ++++++---- net/l2tp/l2tp_netlink.c | 25 +++++++----- net/netfilter/ipvs/ip_vs_ctl.c | 22 ++++++----- net/netlabel/netlabel_calipso.c | 20 ++++++---- net/netlabel/netlabel_cipso_v4.c | 21 ++++++----- net/netlabel/netlabel_mgmt.c | 20 ++++++---- net/netlabel/netlabel_unlabeled.c | 20 ++++++---- net/netlink/genetlink.c | 35 +++++++++-------- net/nfc/netlink.c | 24 +++++++----- net/openvswitch/datapath.c | 4 ++ net/tipc/netlink.c | 22 ++++++----- net/tipc/netlink_compat.c | 20 +++++----- net/wimax/stack.c | 19 +++++----- net/wireless/nl80211.c | 33 ++++++++-------- 37 files changed, 414 insertions(+), 337 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c index 8dfca3d53131..1ab12ad7d5ba 100644 --- a/drivers/acpi/event.c +++ b/drivers/acpi/event.c @@ -83,6 +83,7 @@ static const struct genl_multicast_group acpi_event_mcgrps[] = { }; static struct genl_family acpi_event_genl_family = { + .module = THIS_MODULE, .name = ACPI_GENL_FAMILY_NAME, .version = ACPI_GENL_VERSION, .maxattr = ACPI_GENL_ATTR_MAX, diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index f66737ba1299..0604fd78f826 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1094,13 +1094,7 @@ static int gtp_genl_del_pdp(struct sk_buff *skb, struct genl_info *info) return 0; } -static struct genl_family gtp_genl_family = { - .name = "gtp", - .version = 0, - .hdrsize = 0, - .maxattr = GTPA_MAX, - .netnsok = true, -}; +static struct genl_family gtp_genl_family; static int gtp_genl_fill_info(struct sk_buff *skb, u32 snd_portid, u32 snd_seq, u32 type, struct pdp_ctx *pctx) @@ -1296,6 +1290,17 @@ static const struct genl_ops gtp_genl_ops[] = { }, }; +static struct genl_family gtp_genl_family = { + .name = "gtp", + .version = 0, + .hdrsize = 0, + .maxattr = GTPA_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = gtp_genl_ops, + .n_ops = ARRAY_SIZE(gtp_genl_ops), +}; + static int __net_init gtp_net_init(struct net *net) { struct gtp_net *gn = net_generic(net, gtp_net_id); @@ -1335,7 +1340,7 @@ static int __init gtp_init(void) if (err < 0) goto error_out; - err = genl_register_family_with_ops(>p_genl_family, gtp_genl_ops); + err = genl_register_family(>p_genl_family); if (err < 0) goto unreg_rtnl_link; diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index a5309b81a786..63ca7a3c77cf 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1421,13 +1421,7 @@ static void clear_tx_sa(struct macsec_tx_sa *tx_sa) macsec_txsa_put(tx_sa); } -static struct genl_family macsec_fam = { - .name = MACSEC_GENL_NAME, - .hdrsize = 0, - .version = MACSEC_GENL_VERSION, - .maxattr = MACSEC_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family macsec_fam; static struct net_device *get_dev_from_nl(struct net *net, struct nlattr **attrs) @@ -2654,6 +2648,17 @@ static const struct genl_ops macsec_genl_ops[] = { }, }; +static struct genl_family macsec_fam = { + .name = MACSEC_GENL_NAME, + .hdrsize = 0, + .version = MACSEC_GENL_VERSION, + .maxattr = MACSEC_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = macsec_genl_ops, + .n_ops = ARRAY_SIZE(macsec_genl_ops), +}; + static netdev_tx_t macsec_start_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -3461,7 +3466,7 @@ static int __init macsec_init(void) if (err) goto notifier; - err = genl_register_family_with_ops(&macsec_fam, macsec_genl_ops); + err = genl_register_family(&macsec_fam); if (err) goto rtnl; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 0b50205764ff..46bf7c1216c0 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2150,12 +2150,7 @@ static struct rtnl_link_ops team_link_ops __read_mostly = { * Generic netlink custom interface ***********************************/ -static struct genl_family team_nl_family = { - .name = TEAM_GENL_NAME, - .version = TEAM_GENL_VERSION, - .maxattr = TEAM_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family team_nl_family; static const struct nla_policy team_nl_policy[TEAM_ATTR_MAX + 1] = { [TEAM_ATTR_UNSPEC] = { .type = NLA_UNSPEC, }, @@ -2745,6 +2740,18 @@ static const struct genl_multicast_group team_nl_mcgrps[] = { { .name = TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME, }, }; +static struct genl_family team_nl_family = { + .name = TEAM_GENL_NAME, + .version = TEAM_GENL_VERSION, + .maxattr = TEAM_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = team_nl_ops, + .n_ops = ARRAY_SIZE(team_nl_ops), + .mcgrps = team_nl_mcgrps, + .n_mcgrps = ARRAY_SIZE(team_nl_mcgrps), +}; + static int team_nl_send_multicast(struct sk_buff *skb, struct team *team, u32 portid) { @@ -2768,8 +2775,7 @@ static int team_nl_send_event_port_get(struct team *team, static int team_nl_init(void) { - return genl_register_family_with_ops_groups(&team_nl_family, team_nl_ops, - team_nl_mcgrps); + return genl_register_family(&team_nl_family); } static void team_nl_fini(void) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 54b6cd62676e..5d4637e586e8 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -587,14 +587,8 @@ struct hwsim_radiotap_ack_hdr { __le16 rt_chbitmask; } __packed; -/* MAC80211_HWSIM netlinf family */ -static struct genl_family hwsim_genl_family = { - .hdrsize = 0, - .name = "MAC80211_HWSIM", - .version = 1, - .maxattr = HWSIM_ATTR_MAX, - .netnsok = true, -}; +/* MAC80211_HWSIM netlink family */ +static struct genl_family hwsim_genl_family; enum hwsim_multicast_groups { HWSIM_MCGRP_CONFIG, @@ -3234,6 +3228,18 @@ static const struct genl_ops hwsim_ops[] = { }, }; +static struct genl_family hwsim_genl_family = { + .name = "MAC80211_HWSIM", + .version = 1, + .maxattr = HWSIM_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = hwsim_ops, + .n_ops = ARRAY_SIZE(hwsim_ops), + .mcgrps = hwsim_mcgrps, + .n_mcgrps = ARRAY_SIZE(hwsim_mcgrps), +}; + static void destroy_radio(struct work_struct *work) { struct mac80211_hwsim_data *data = @@ -3287,9 +3293,7 @@ static int hwsim_init_netlink(void) printk(KERN_INFO "mac80211_hwsim: initializing netlink\n"); - rc = genl_register_family_with_ops_groups(&hwsim_genl_family, - hwsim_ops, - hwsim_mcgrps); + rc = genl_register_family(&hwsim_genl_family); if (rc) goto failure; diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index cc50eb87b28a..c0ab7bb8c3ce 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -1369,6 +1369,7 @@ static struct genl_multicast_group pmcraid_mcgrps[] = { }; static struct genl_family pmcraid_event_family = { + .module = THIS_MODULE, .name = "pmcraid", .version = 1, .maxattr = PMCRAID_AEN_ATTR_MAX, diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 313a0ef3cda7..3483372f5562 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -148,6 +148,7 @@ static const struct genl_multicast_group tcmu_mcgrps[] = { /* Our generic netlink family */ static struct genl_family tcmu_genl_family = { + .module = THIS_MODULE, .hdrsize = 0, .name = "TCM-USER", .version = 1, diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 68d7503f6417..93b6caab2d9f 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -2164,6 +2164,7 @@ static const struct genl_multicast_group thermal_event_mcgrps[] = { }; static struct genl_family thermal_event_genl_family = { + .module = THIS_MODULE, .name = THERMAL_GENL_FAMILY_NAME, .version = THERMAL_GENL_VERSION, .maxattr = THERMAL_GENL_ATTR_MAX, diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index 00d226956264..04042d69573c 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -16,10 +16,7 @@ static uint32_t dlm_nl_seqnum; static uint32_t listener_nlportid; -static struct genl_family family = { - .name = DLM_GENL_NAME, - .version = DLM_GENL_VERSION, -}; +static struct genl_family family; static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size) { @@ -75,9 +72,17 @@ static struct genl_ops dlm_nl_ops[] = { }, }; +static struct genl_family family = { + .name = DLM_GENL_NAME, + .version = DLM_GENL_VERSION, + .ops = dlm_nl_ops, + .n_ops = ARRAY_SIZE(dlm_nl_ops), + .module = THIS_MODULE, +}; + int __init dlm_netlink_init(void) { - return genl_register_family_with_ops(&family, dlm_nl_ops); + return genl_register_family(&family); } void dlm_netlink_exit(void) diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c index 3965a5cdfaa2..9457c7b0dfa2 100644 --- a/fs/quota/netlink.c +++ b/fs/quota/netlink.c @@ -13,6 +13,7 @@ static const struct genl_multicast_group quota_mcgrps[] = { /* Netlink family structure for quota */ static struct genl_family quota_genl_family = { + .module = THIS_MODULE, .hdrsize = 0, .name = "VFS_DQUOT", .version = 1, diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h index c934d3a96b5e..2896f93808ae 100644 --- a/include/linux/drbd_genl.h +++ b/include/linux/drbd_genl.h @@ -67,7 +67,7 @@ * genl_magic_func.h * generates an entry in the static genl_ops array, * and static register/unregister functions to - * genl_register_family_with_ops(). + * genl_register_family(). * * flags and handler: * GENL_op_init( .doit = x, .dumpit = y, .flags = something) diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 7c070c1fe457..40c2e39362c8 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -259,15 +259,7 @@ static struct genl_ops ZZZ_genl_ops[] __read_mostly = { * {{{2 */ #define ZZZ_genl_family CONCAT_(GENL_MAGIC_FAMILY, _genl_family) -static struct genl_family ZZZ_genl_family __read_mostly = { - .name = __stringify(GENL_MAGIC_FAMILY), - .version = GENL_MAGIC_VERSION, -#ifdef GENL_MAGIC_FAMILY_HDRSZ - .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), -#endif - .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1, -}; - +static struct genl_family ZZZ_genl_family; /* * Magic: define multicast groups * Magic: define multicast group registration helper @@ -301,11 +293,23 @@ static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \ #undef GENL_mc_group #define GENL_mc_group(group) +static struct genl_family ZZZ_genl_family __read_mostly = { + .name = __stringify(GENL_MAGIC_FAMILY), + .version = GENL_MAGIC_VERSION, +#ifdef GENL_MAGIC_FAMILY_HDRSZ + .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), +#endif + .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1, + .ops = ZZZ_genl_ops, + .n_ops = ARRAY_SIZE(ZZZ_genl_ops), + .mcgrps = ZZZ_genl_mcgrps, + .n_mcgrps = ARRAY_SIZE(ZZZ_genl_mcgrps), + .module = THIS_MODULE, +}; + int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void) { - return genl_register_family_with_ops_groups(&ZZZ_genl_family, \ - ZZZ_genl_ops, \ - ZZZ_genl_mcgrps); + return genl_register_family(&ZZZ_genl_family); } void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 43a5c3975a2f..2298b50cee34 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -39,13 +39,14 @@ struct genl_info; * Note that unbind() will not be called symmetrically if the * generic netlink family is removed while there are still open * sockets. - * @attrbuf: buffer to store parsed attributes - * @family_list: family list - * @mcgrps: multicast groups used by this family (private) - * @n_mcgrps: number of multicast groups (private) + * @attrbuf: buffer to store parsed attributes (private) + * @family_list: family list (private) + * @mcgrps: multicast groups used by this family + * @n_mcgrps: number of multicast groups * @mcgrp_offset: starting number of multicast group IDs in this family - * @ops: the operations supported by this family (private) - * @n_ops: number of operations supported by this family (private) + * (private) + * @ops: the operations supported by this family + * @n_ops: number of operations supported by this family */ struct genl_family { unsigned int id; /* private */ @@ -64,10 +65,10 @@ struct genl_family { int (*mcast_bind)(struct net *net, int group); void (*mcast_unbind)(struct net *net, int group); struct nlattr ** attrbuf; /* private */ - const struct genl_ops * ops; /* private */ - const struct genl_multicast_group *mcgrps; /* private */ - unsigned int n_ops; /* private */ - unsigned int n_mcgrps; /* private */ + const struct genl_ops * ops; + const struct genl_multicast_group *mcgrps; + unsigned int n_ops; + unsigned int n_mcgrps; unsigned int mcgrp_offset; /* private */ struct list_head family_list; /* private */ struct module *module; @@ -132,55 +133,7 @@ struct genl_ops { u8 flags; }; -int __genl_register_family(struct genl_family *family); - -static inline int genl_register_family(struct genl_family *family) -{ - family->module = THIS_MODULE; - return __genl_register_family(family); -} - -/** - * genl_register_family_with_ops - register a generic netlink family with ops - * @family: generic netlink family - * @ops: operations to be registered - * @n_ops: number of elements to register - * - * Registers the specified family and operations from the specified table. - * Only one family may be registered with the same family name or identifier. - * - * Either a doit or dumpit callback must be specified for every registered - * operation or the function will fail. Only one operation structure per - * command identifier may be registered. - * - * See include/net/genetlink.h for more documenation on the operations - * structure. - * - * Return 0 on success or a negative error code. - */ -static inline int -_genl_register_family_with_ops_grps(struct genl_family *family, - const struct genl_ops *ops, size_t n_ops, - const struct genl_multicast_group *mcgrps, - size_t n_mcgrps) -{ - family->module = THIS_MODULE; - family->ops = ops; - family->n_ops = n_ops; - family->mcgrps = mcgrps; - family->n_mcgrps = n_mcgrps; - return __genl_register_family(family); -} - -#define genl_register_family_with_ops(family, ops) \ - _genl_register_family_with_ops_grps((family), \ - (ops), ARRAY_SIZE(ops), \ - NULL, 0) -#define genl_register_family_with_ops_groups(family, ops, grps) \ - _genl_register_family_with_ops_grps((family), \ - (ops), ARRAY_SIZE(ops), \ - (grps), ARRAY_SIZE(grps)) - +int genl_register_family(struct genl_family *family); int genl_unregister_family(struct genl_family *family); void genl_notify(struct genl_family *family, struct sk_buff *skb, struct genl_info *info, u32 group, gfp_t flags); diff --git a/kernel/taskstats.c b/kernel/taskstats.c index d7a1a9461a10..4075ece592f2 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -41,11 +41,7 @@ static DEFINE_PER_CPU(__u32, taskstats_seqnum); static int family_registered; struct kmem_cache *taskstats_cache; -static struct genl_family family = { - .name = TASKSTATS_GENL_NAME, - .version = TASKSTATS_GENL_VERSION, - .maxattr = TASKSTATS_CMD_ATTR_MAX, -}; +static struct genl_family family; static const struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] = { [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, @@ -650,6 +646,15 @@ static const struct genl_ops taskstats_ops[] = { }, }; +static struct genl_family family = { + .name = TASKSTATS_GENL_NAME, + .version = TASKSTATS_GENL_VERSION, + .maxattr = TASKSTATS_CMD_ATTR_MAX, + .module = THIS_MODULE, + .ops = taskstats_ops, + .n_ops = ARRAY_SIZE(taskstats_ops), +}; + /* Needed early in initialization */ void __init taskstats_init_early(void) { @@ -666,7 +671,7 @@ static int __init taskstats_init(void) { int rc; - rc = genl_register_family_with_ops(&family, taskstats_ops); + rc = genl_register_family(&family); if (rc) return rc; diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index a03b0ed7e8dd..e28cec34a016 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -48,13 +48,7 @@ #include "tp_meter.h" #include "translation-table.h" -struct genl_family batadv_netlink_family = { - .hdrsize = 0, - .name = BATADV_NL_NAME, - .version = 1, - .maxattr = BATADV_ATTR_MAX, - .netnsok = true, -}; +struct genl_family batadv_netlink_family; /* multicast groups */ enum batadv_netlink_multicast_groups { @@ -609,6 +603,19 @@ static struct genl_ops batadv_netlink_ops[] = { }; +struct genl_family batadv_netlink_family = { + .hdrsize = 0, + .name = BATADV_NL_NAME, + .version = 1, + .maxattr = BATADV_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = batadv_netlink_ops, + .n_ops = ARRAY_SIZE(batadv_netlink_ops), + .mcgrps = batadv_netlink_mcgrps, + .n_mcgrps = ARRAY_SIZE(batadv_netlink_mcgrps), +}; + /** * batadv_netlink_register - register batadv genl netlink family */ @@ -616,9 +623,7 @@ void __init batadv_netlink_register(void) { int ret; - ret = genl_register_family_with_ops_groups(&batadv_netlink_family, - batadv_netlink_ops, - batadv_netlink_mcgrps); + ret = genl_register_family(&batadv_netlink_family); if (ret) pr_warn("unable to register netlink family"); } diff --git a/net/core/devlink.c b/net/core/devlink.c index 3008d9c33875..063da8091aef 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -341,14 +341,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops, mutex_unlock(&devlink_mutex); } -static struct genl_family devlink_nl_family = { - .name = DEVLINK_GENL_NAME, - .version = DEVLINK_GENL_VERSION, - .maxattr = DEVLINK_ATTR_MAX, - .netnsok = true, - .pre_doit = devlink_nl_pre_doit, - .post_doit = devlink_nl_post_doit, -}; +static struct genl_family devlink_nl_family; enum devlink_multicast_groups { DEVLINK_MCGRP_CONFIG, @@ -1619,6 +1612,20 @@ static const struct genl_ops devlink_nl_ops[] = { }, }; +static struct genl_family devlink_nl_family = { + .name = DEVLINK_GENL_NAME, + .version = DEVLINK_GENL_VERSION, + .maxattr = DEVLINK_ATTR_MAX, + .netnsok = true, + .pre_doit = devlink_nl_pre_doit, + .post_doit = devlink_nl_post_doit, + .module = THIS_MODULE, + .ops = devlink_nl_ops, + .n_ops = ARRAY_SIZE(devlink_nl_ops), + .mcgrps = devlink_nl_mcgrps, + .n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps), +}; + /** * devlink_alloc - Allocate new devlink instance resources * @@ -1841,9 +1848,7 @@ EXPORT_SYMBOL_GPL(devlink_sb_unregister); static int __init devlink_module_init(void) { - return genl_register_family_with_ops_groups(&devlink_nl_family, - devlink_nl_ops, - devlink_nl_mcgrps); + return genl_register_family(&devlink_nl_family); } static void __exit devlink_module_exit(void) diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index a5320dfcd978..80c002794ff6 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -59,11 +59,7 @@ struct dm_hw_stat_delta { unsigned long last_drop_val; }; -static struct genl_family net_drop_monitor_family = { - .hdrsize = 0, - .name = "NET_DM", - .version = 2, -}; +static struct genl_family net_drop_monitor_family; static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); @@ -350,6 +346,17 @@ static const struct genl_ops dropmon_ops[] = { }, }; +static struct genl_family net_drop_monitor_family = { + .hdrsize = 0, + .name = "NET_DM", + .version = 2, + .module = THIS_MODULE, + .ops = dropmon_ops, + .n_ops = ARRAY_SIZE(dropmon_ops), + .mcgrps = dropmon_mcgrps, + .n_mcgrps = ARRAY_SIZE(dropmon_mcgrps), +}; + static struct notifier_block dropmon_net_notifier = { .notifier_call = dropmon_net_event }; @@ -366,8 +373,7 @@ static int __init init_net_drop_monitor(void) return -ENOSPC; } - rc = genl_register_family_with_ops_groups(&net_drop_monitor_family, - dropmon_ops, dropmon_mcgrps); + rc = genl_register_family(&net_drop_monitor_family); if (rc) { pr_err("Could not create drop monitor netlink family\n"); return rc; diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 2ad039492bee..aab34c7f6f89 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -131,12 +131,7 @@ static const struct nla_policy hsr_genl_policy[HSR_A_MAX + 1] = { [HSR_A_IF2_SEQ] = { .type = NLA_U16 }, }; -static struct genl_family hsr_genl_family = { - .hdrsize = 0, - .name = "HSR", - .version = 1, - .maxattr = HSR_A_MAX, -}; +static struct genl_family hsr_genl_family; static const struct genl_multicast_group hsr_mcgrps[] = { { .name = "hsr-network", }, @@ -466,6 +461,18 @@ static const struct genl_ops hsr_ops[] = { }, }; +static struct genl_family hsr_genl_family = { + .hdrsize = 0, + .name = "HSR", + .version = 1, + .maxattr = HSR_A_MAX, + .module = THIS_MODULE, + .ops = hsr_ops, + .n_ops = ARRAY_SIZE(hsr_ops), + .mcgrps = hsr_mcgrps, + .n_mcgrps = ARRAY_SIZE(hsr_mcgrps), +}; + int __init hsr_netlink_init(void) { int rc; @@ -474,8 +481,7 @@ int __init hsr_netlink_init(void) if (rc) goto fail_rtnl_link_register; - rc = genl_register_family_with_ops_groups(&hsr_genl_family, hsr_ops, - hsr_mcgrps); + rc = genl_register_family(&hsr_genl_family); if (rc) goto fail_genl_register_family; diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 19144158b696..08e62470bac2 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -28,13 +28,6 @@ static unsigned int ieee802154_seq_num; static DEFINE_SPINLOCK(ieee802154_seq_lock); -struct genl_family nl802154_family = { - .hdrsize = 0, - .name = IEEE802154_NL_NAME, - .version = 1, - .maxattr = IEEE802154_ATTR_MAX, -}; - /* Requests to userspace */ struct sk_buff *ieee802154_nl_create(int flags, u8 req) { @@ -138,11 +131,21 @@ static const struct genl_multicast_group ieee802154_mcgrps[] = { [IEEE802154_BEACON_MCGRP] = { .name = IEEE802154_MCAST_BEACON_NAME, }, }; +struct genl_family nl802154_family = { + .hdrsize = 0, + .name = IEEE802154_NL_NAME, + .version = 1, + .maxattr = IEEE802154_ATTR_MAX, + .module = THIS_MODULE, + .ops = ieee8021154_ops, + .n_ops = ARRAY_SIZE(ieee8021154_ops), + .mcgrps = ieee802154_mcgrps, + .n_mcgrps = ARRAY_SIZE(ieee802154_mcgrps), +}; + int __init ieee802154_nl_init(void) { - return genl_register_family_with_ops_groups(&nl802154_family, - ieee8021154_ops, - ieee802154_mcgrps); + return genl_register_family(&nl802154_family); } void ieee802154_nl_exit(void) diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 182299858f1d..f7e75578aedd 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -26,22 +26,8 @@ #include "rdev-ops.h" #include "core.h" -static int nl802154_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, - struct genl_info *info); - -static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb, - struct genl_info *info); - /* the netlink family */ -static struct genl_family nl802154_fam = { - .name = NL802154_GENL_NAME, /* have users key off the name instead */ - .hdrsize = 0, /* no private header */ - .version = 1, /* no particular meaning now */ - .maxattr = NL802154_ATTR_MAX, - .netnsok = true, - .pre_doit = nl802154_pre_doit, - .post_doit = nl802154_post_doit, -}; +static struct genl_family nl802154_fam; /* multicast groups */ enum nl802154_multicast_groups { @@ -2476,11 +2462,25 @@ static const struct genl_ops nl802154_ops[] = { #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ }; +static struct genl_family nl802154_fam = { + .name = NL802154_GENL_NAME, /* have users key off the name instead */ + .hdrsize = 0, /* no private header */ + .version = 1, /* no particular meaning now */ + .maxattr = NL802154_ATTR_MAX, + .netnsok = true, + .pre_doit = nl802154_pre_doit, + .post_doit = nl802154_post_doit, + .module = THIS_MODULE, + .ops = nl802154_ops, + .n_ops = ARRAY_SIZE(nl802154_ops), + .mcgrps = nl802154_mcgrps, + .n_mcgrps = ARRAY_SIZE(nl802154_mcgrps), +}; + /* initialisation/exit functions */ int nl802154_init(void) { - return genl_register_family_with_ops_groups(&nl802154_fam, nl802154_ops, - nl802154_mcgrps); + return genl_register_family(&nl802154_fam); } void nl802154_exit(void) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index e3fc527c5d37..5b5226a2434f 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -622,13 +622,7 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg) return err; } -static struct genl_family fou_nl_family = { - .hdrsize = 0, - .name = FOU_GENL_NAME, - .version = FOU_GENL_VERSION, - .maxattr = FOU_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family fou_nl_family; static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = { [FOU_ATTR_PORT] = { .type = NLA_U16, }, @@ -830,6 +824,17 @@ static const struct genl_ops fou_nl_ops[] = { }, }; +static struct genl_family fou_nl_family = { + .hdrsize = 0, + .name = FOU_GENL_NAME, + .version = FOU_GENL_VERSION, + .maxattr = FOU_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = fou_nl_ops, + .n_ops = ARRAY_SIZE(fou_nl_ops), +}; + size_t fou_encap_hlen(struct ip_tunnel_encap *e) { return sizeof(struct udphdr); @@ -1085,8 +1090,7 @@ static int __init fou_init(void) if (ret) goto exit; - ret = genl_register_family_with_ops(&fou_nl_family, - fou_nl_ops); + ret = genl_register_family(&fou_nl_family); if (ret < 0) goto unregister; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 3da305127b32..bba3c72c4a39 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -742,13 +742,7 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss, rcu_read_unlock(); } -static struct genl_family tcp_metrics_nl_family = { - .hdrsize = 0, - .name = TCP_METRICS_GENL_NAME, - .version = TCP_METRICS_GENL_VERSION, - .maxattr = TCP_METRICS_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family tcp_metrics_nl_family; static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = { [TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, }, @@ -1115,6 +1109,17 @@ static const struct genl_ops tcp_metrics_nl_ops[] = { }, }; +static struct genl_family tcp_metrics_nl_family = { + .hdrsize = 0, + .name = TCP_METRICS_GENL_NAME, + .version = TCP_METRICS_GENL_VERSION, + .maxattr = TCP_METRICS_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = tcp_metrics_nl_ops, + .n_ops = ARRAY_SIZE(tcp_metrics_nl_ops), +}; + static unsigned int tcpmhash_entries; static int __init set_tcpmhash_entries(char *str) { @@ -1178,8 +1183,7 @@ void __init tcp_metrics_init(void) if (ret < 0) panic("Could not allocate the tcp_metrics hash table\n"); - ret = genl_register_family_with_ops(&tcp_metrics_nl_family, - tcp_metrics_nl_ops); + ret = genl_register_family(&tcp_metrics_nl_family); if (ret < 0) panic("Could not register tcp_metrics generic netlink\n"); } diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 0d57e27d1cdd..97f7b0cc4675 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -118,14 +118,7 @@ static const struct rhashtable_params rht_params = { .obj_cmpfn = ila_cmpfn, }; -static struct genl_family ila_nl_family = { - .hdrsize = 0, - .name = ILA_GENL_NAME, - .version = ILA_GENL_VERSION, - .maxattr = ILA_ATTR_MAX, - .netnsok = true, - .parallel_ops = true, -}; +static struct genl_family ila_nl_family; static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, @@ -560,6 +553,18 @@ static const struct genl_ops ila_nl_ops[] = { }, }; +static struct genl_family ila_nl_family = { + .hdrsize = 0, + .name = ILA_GENL_NAME, + .version = ILA_GENL_VERSION, + .maxattr = ILA_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .module = THIS_MODULE, + .ops = ila_nl_ops, + .n_ops = ARRAY_SIZE(ila_nl_ops), +}; + #define ILA_HASH_TABLE_SIZE 1024 static __net_init int ila_init_net(struct net *net) @@ -630,8 +635,7 @@ int ila_xlat_init(void) if (ret) goto exit; - ret = genl_register_family_with_ops(&ila_nl_family, - ila_nl_ops); + ret = genl_register_family(&ila_nl_family); if (ret < 0) goto unregister; diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c index f23b81aa91fe..07877347c2f7 100644 --- a/net/irda/irnetlink.c +++ b/net/irda/irnetlink.c @@ -24,12 +24,7 @@ -static struct genl_family irda_nl_family = { - .name = IRDA_NL_NAME, - .hdrsize = 0, - .version = IRDA_NL_VERSION, - .maxattr = IRDA_NL_CMD_MAX, -}; +static struct genl_family irda_nl_family; static struct net_device * ifname_to_netdev(struct net *net, struct genl_info *info) { @@ -146,9 +141,19 @@ static const struct genl_ops irda_nl_ops[] = { }; +static struct genl_family irda_nl_family = { + .name = IRDA_NL_NAME, + .hdrsize = 0, + .version = IRDA_NL_VERSION, + .maxattr = IRDA_NL_CMD_MAX, + .module = THIS_MODULE, + .ops = irda_nl_ops, + .n_ops = ARRAY_SIZE(irda_nl_ops), +}; + int irda_nl_register(void) { - return genl_register_family_with_ops(&irda_nl_family, irda_nl_ops); + return genl_register_family(&irda_nl_family); } void irda_nl_unregister(void) diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 4fbf1f41ac52..e4e8c0769a6b 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -31,13 +31,7 @@ #include "l2tp_core.h" -static struct genl_family l2tp_nl_family = { - .name = L2TP_GENL_NAME, - .version = L2TP_GENL_VERSION, - .hdrsize = 0, - .maxattr = L2TP_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family l2tp_nl_family; static const struct genl_multicast_group l2tp_multicast_group[] = { { @@ -976,6 +970,19 @@ static const struct genl_ops l2tp_nl_ops[] = { }, }; +static struct genl_family l2tp_nl_family = { + .name = L2TP_GENL_NAME, + .version = L2TP_GENL_VERSION, + .hdrsize = 0, + .maxattr = L2TP_ATTR_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = l2tp_nl_ops, + .n_ops = ARRAY_SIZE(l2tp_nl_ops), + .mcgrps = l2tp_multicast_group, + .n_mcgrps = ARRAY_SIZE(l2tp_multicast_group), +}; + int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops) { int ret; @@ -1012,9 +1019,7 @@ EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops); static int l2tp_nl_init(void) { pr_info("L2TP netlink interface\n"); - return genl_register_family_with_ops_groups(&l2tp_nl_family, - l2tp_nl_ops, - l2tp_multicast_group); + return genl_register_family(&l2tp_nl_family); } static void l2tp_nl_cleanup(void) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ceed66cdd03e..ea3e8aed063f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2840,13 +2840,7 @@ static struct nf_sockopt_ops ip_vs_sockopts = { */ /* IPVS genetlink family */ -static struct genl_family ip_vs_genl_family = { - .hdrsize = 0, - .name = IPVS_GENL_NAME, - .version = IPVS_GENL_VERSION, - .maxattr = IPVS_CMD_MAX, - .netnsok = true, /* Make ipvsadm to work on netns */ -}; +static struct genl_family ip_vs_genl_family; /* Policy used for first-level command attributes */ static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { @@ -3871,10 +3865,20 @@ static const struct genl_ops ip_vs_genl_ops[] = { }, }; +static struct genl_family ip_vs_genl_family = { + .hdrsize = 0, + .name = IPVS_GENL_NAME, + .version = IPVS_GENL_VERSION, + .maxattr = IPVS_CMD_MAX, + .netnsok = true, /* Make ipvsadm to work on netns */ + .module = THIS_MODULE, + .ops = ip_vs_genl_ops, + .n_ops = ARRAY_SIZE(ip_vs_genl_ops), +}; + static int __init ip_vs_genl_register(void) { - return genl_register_family_with_ops(&ip_vs_genl_family, - ip_vs_genl_ops); + return genl_register_family(&ip_vs_genl_family); } static void ip_vs_genl_unregister(void) diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c index 152e503b8c5d..ca7c9c411a5c 100644 --- a/net/netlabel/netlabel_calipso.c +++ b/net/netlabel/netlabel_calipso.c @@ -60,12 +60,7 @@ struct netlbl_domhsh_walk_arg { }; /* NetLabel Generic NETLINK CALIPSO family */ -static struct genl_family netlbl_calipso_gnl_family = { - .hdrsize = 0, - .name = NETLBL_NLTYPE_CALIPSO_NAME, - .version = NETLBL_PROTO_VERSION, - .maxattr = NLBL_CALIPSO_A_MAX, -}; +static struct genl_family netlbl_calipso_gnl_family; /* NetLabel Netlink attribute policy */ static const struct nla_policy calipso_genl_policy[NLBL_CALIPSO_A_MAX + 1] = { @@ -354,6 +349,16 @@ static const struct genl_ops netlbl_calipso_ops[] = { }, }; +static struct genl_family netlbl_calipso_gnl_family = { + .hdrsize = 0, + .name = NETLBL_NLTYPE_CALIPSO_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = NLBL_CALIPSO_A_MAX, + .module = THIS_MODULE, + .ops = netlbl_calipso_ops, + .n_ops = ARRAY_SIZE(netlbl_calipso_ops), +}; + /* NetLabel Generic NETLINK Protocol Functions */ @@ -367,8 +372,7 @@ static const struct genl_ops netlbl_calipso_ops[] = { */ int __init netlbl_calipso_genl_init(void) { - return genl_register_family_with_ops(&netlbl_calipso_gnl_family, - netlbl_calipso_ops); + return genl_register_family(&netlbl_calipso_gnl_family); } static const struct netlbl_calipso_ops *calipso_ops; diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 755b284e7ad4..a665eae91245 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -59,13 +59,7 @@ struct netlbl_domhsh_walk_arg { }; /* NetLabel Generic NETLINK CIPSOv4 family */ -static struct genl_family netlbl_cipsov4_gnl_family = { - .hdrsize = 0, - .name = NETLBL_NLTYPE_CIPSOV4_NAME, - .version = NETLBL_PROTO_VERSION, - .maxattr = NLBL_CIPSOV4_A_MAX, -}; - +static struct genl_family netlbl_cipsov4_gnl_family; /* NetLabel Netlink attribute policy */ static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = { [NLBL_CIPSOV4_A_DOI] = { .type = NLA_U32 }, @@ -766,6 +760,16 @@ static const struct genl_ops netlbl_cipsov4_ops[] = { }, }; +static struct genl_family netlbl_cipsov4_gnl_family = { + .hdrsize = 0, + .name = NETLBL_NLTYPE_CIPSOV4_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = NLBL_CIPSOV4_A_MAX, + .module = THIS_MODULE, + .ops = netlbl_cipsov4_ops, + .n_ops = ARRAY_SIZE(netlbl_cipsov4_ops), +}; + /* * NetLabel Generic NETLINK Protocol Functions */ @@ -780,6 +784,5 @@ static const struct genl_ops netlbl_cipsov4_ops[] = { */ int __init netlbl_cipsov4_genl_init(void) { - return genl_register_family_with_ops(&netlbl_cipsov4_gnl_family, - netlbl_cipsov4_ops); + return genl_register_family(&netlbl_cipsov4_gnl_family); } diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 3b00f2368fcd..ecfe8eb149db 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -60,12 +60,7 @@ struct netlbl_domhsh_walk_arg { }; /* NetLabel Generic NETLINK CIPSOv4 family */ -static struct genl_family netlbl_mgmt_gnl_family = { - .hdrsize = 0, - .name = NETLBL_NLTYPE_MGMT_NAME, - .version = NETLBL_PROTO_VERSION, - .maxattr = NLBL_MGMT_A_MAX, -}; +static struct genl_family netlbl_mgmt_gnl_family; /* NetLabel Netlink attribute policy */ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = { @@ -833,6 +828,16 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = { }, }; +static struct genl_family netlbl_mgmt_gnl_family = { + .hdrsize = 0, + .name = NETLBL_NLTYPE_MGMT_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = NLBL_MGMT_A_MAX, + .module = THIS_MODULE, + .ops = netlbl_mgmt_genl_ops, + .n_ops = ARRAY_SIZE(netlbl_mgmt_genl_ops), +}; + /* * NetLabel Generic NETLINK Protocol Functions */ @@ -847,6 +852,5 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = { */ int __init netlbl_mgmt_genl_init(void) { - return genl_register_family_with_ops(&netlbl_mgmt_gnl_family, - netlbl_mgmt_genl_ops); + return genl_register_family(&netlbl_mgmt_gnl_family); } diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index c2ea8d1f653a..5dbbad41114f 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -123,12 +123,7 @@ static struct netlbl_unlhsh_iface __rcu *netlbl_unlhsh_def; static u8 netlabel_unlabel_acceptflg; /* NetLabel Generic NETLINK unlabeled family */ -static struct genl_family netlbl_unlabel_gnl_family = { - .hdrsize = 0, - .name = NETLBL_NLTYPE_UNLABELED_NAME, - .version = NETLBL_PROTO_VERSION, - .maxattr = NLBL_UNLABEL_A_MAX, -}; +static struct genl_family netlbl_unlabel_gnl_family; /* NetLabel Netlink attribute policy */ static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { @@ -1377,6 +1372,16 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = { }, }; +static struct genl_family netlbl_unlabel_gnl_family = { + .hdrsize = 0, + .name = NETLBL_NLTYPE_UNLABELED_NAME, + .version = NETLBL_PROTO_VERSION, + .maxattr = NLBL_UNLABEL_A_MAX, + .module = THIS_MODULE, + .ops = netlbl_unlabel_genl_ops, + .n_ops = ARRAY_SIZE(netlbl_unlabel_genl_ops), +}; + /* * NetLabel Generic NETLINK Protocol Functions */ @@ -1391,8 +1396,7 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = { */ int __init netlbl_unlabel_genl_init(void) { - return genl_register_family_with_ops(&netlbl_unlabel_gnl_family, - netlbl_unlabel_genl_ops); + return genl_register_family(&netlbl_unlabel_gnl_family); } /* diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index f19ec969edee..ca582ee4ae05 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -344,18 +344,18 @@ static int genl_validate_ops(const struct genl_family *family) } /** - * __genl_register_family - register a generic netlink family + * genl_register_family - register a generic netlink family * @family: generic netlink family * * Registers the specified family after validating it first. Only one * family may be registered with the same family name or identifier. * - * The family's ops array must already be assigned, you can use the - * genl_register_family_with_ops() helper function. + * The family's ops, multicast groups and module pointer must already + * be assigned. * * Return 0 on success or a negative error code. */ -int __genl_register_family(struct genl_family *family) +int genl_register_family(struct genl_family *family) { int err, i; @@ -429,7 +429,7 @@ errout_locked: genl_unlock_all(); return err; } -EXPORT_SYMBOL(__genl_register_family); +EXPORT_SYMBOL(genl_register_family); /** * genl_unregister_family - unregister generic netlink family @@ -452,7 +452,6 @@ int genl_unregister_family(struct genl_family *family) genl_unregister_mc_groups(family); list_del(&rc->family_list); - family->n_ops = 0; up_write(&cb_lock); wait_event(genl_sk_destructing_waitq, atomic_read(&genl_sk_destructing_cnt) == 0); @@ -681,13 +680,7 @@ static void genl_rcv(struct sk_buff *skb) * Controller **************************************************************************/ -static struct genl_family genl_ctrl = { - .id = GENL_ID_CTRL, - .name = "nlctrl", - .version = 0x2, - .maxattr = CTRL_ATTR_MAX, - .netnsok = true, -}; +static struct genl_family genl_ctrl; static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) @@ -997,6 +990,19 @@ static const struct genl_multicast_group genl_ctrl_groups[] = { { .name = "notify", }, }; +static struct genl_family genl_ctrl = { + .module = THIS_MODULE, + .ops = genl_ctrl_ops, + .n_ops = ARRAY_SIZE(genl_ctrl_ops), + .mcgrps = genl_ctrl_groups, + .n_mcgrps = ARRAY_SIZE(genl_ctrl_groups), + .id = GENL_ID_CTRL, + .name = "nlctrl", + .version = 0x2, + .maxattr = CTRL_ATTR_MAX, + .netnsok = true, +}; + static int genl_bind(struct net *net, int group) { int i, err = -ENOENT; @@ -1086,8 +1092,7 @@ static int __init genl_init(void) for (i = 0; i < GENL_FAM_TAB_SIZE; i++) INIT_LIST_HEAD(&family_ht[i]); - err = genl_register_family_with_ops_groups(&genl_ctrl, genl_ctrl_ops, - genl_ctrl_groups); + err = genl_register_family(&genl_ctrl); if (err < 0) goto problem; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index c230403e066c..450b1e5144cc 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -38,13 +38,7 @@ static const struct genl_multicast_group nfc_genl_mcgrps[] = { { .name = NFC_GENL_MCAST_EVENT_NAME, }, }; -static struct genl_family nfc_genl_family = { - .hdrsize = 0, - .name = NFC_GENL_NAME, - .version = NFC_GENL_VERSION, - .maxattr = NFC_ATTR_MAX, -}; - +static struct genl_family nfc_genl_family; static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_DEVICE_INDEX] = { .type = NLA_U32 }, [NFC_ATTR_DEVICE_NAME] = { .type = NLA_STRING, @@ -1752,6 +1746,18 @@ static const struct genl_ops nfc_genl_ops[] = { }, }; +static struct genl_family nfc_genl_family = { + .hdrsize = 0, + .name = NFC_GENL_NAME, + .version = NFC_GENL_VERSION, + .maxattr = NFC_ATTR_MAX, + .module = THIS_MODULE, + .ops = nfc_genl_ops, + .n_ops = ARRAY_SIZE(nfc_genl_ops), + .mcgrps = nfc_genl_mcgrps, + .n_mcgrps = ARRAY_SIZE(nfc_genl_mcgrps), +}; + struct urelease_work { struct work_struct w; @@ -1837,9 +1843,7 @@ int __init nfc_genl_init(void) { int rc; - rc = genl_register_family_with_ops_groups(&nfc_genl_family, - nfc_genl_ops, - nfc_genl_mcgrps); + rc = genl_register_family(&nfc_genl_family); if (rc) return rc; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f9fef7dfba15..ad6a111a0014 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -679,6 +679,7 @@ static struct genl_family dp_packet_genl_family = { .parallel_ops = true, .ops = dp_packet_genl_ops, .n_ops = ARRAY_SIZE(dp_packet_genl_ops), + .module = THIS_MODULE, }; static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats, @@ -1445,6 +1446,7 @@ static struct genl_family dp_flow_genl_family = { .n_ops = ARRAY_SIZE(dp_flow_genl_ops), .mcgrps = &ovs_dp_flow_multicast_group, .n_mcgrps = 1, + .module = THIS_MODULE, }; static size_t ovs_dp_cmd_msg_size(void) @@ -1830,6 +1832,7 @@ static struct genl_family dp_datapath_genl_family = { .n_ops = ARRAY_SIZE(dp_datapath_genl_ops), .mcgrps = &ovs_dp_datapath_multicast_group, .n_mcgrps = 1, + .module = THIS_MODULE, }; /* Called with ovs_mutex or RCU read lock. */ @@ -2251,6 +2254,7 @@ struct genl_family dp_vport_genl_family = { .n_ops = ARRAY_SIZE(dp_vport_genl_ops), .mcgrps = &ovs_dp_vport_multicast_group, .n_mcgrps = 1, + .module = THIS_MODULE, }; static struct genl_family * const dp_genl_families[] = { diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 383b8fedabc7..74a405bf107b 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -135,14 +135,6 @@ const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = { /* Users of the legacy API (tipc-config) can't handle that we add operations, * so we have a separate genl handling for the new API. */ -struct genl_family tipc_genl_family = { - .name = TIPC_GENL_V2_NAME, - .version = TIPC_GENL_V2_VERSION, - .hdrsize = 0, - .maxattr = TIPC_NLA_MAX, - .netnsok = true, -}; - static const struct genl_ops tipc_genl_v2_ops[] = { { .cmd = TIPC_NL_BEARER_DISABLE, @@ -257,6 +249,17 @@ static const struct genl_ops tipc_genl_v2_ops[] = { #endif }; +struct genl_family tipc_genl_family = { + .name = TIPC_GENL_V2_NAME, + .version = TIPC_GENL_V2_VERSION, + .hdrsize = 0, + .maxattr = TIPC_NLA_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = tipc_genl_v2_ops, + .n_ops = ARRAY_SIZE(tipc_genl_v2_ops), +}; + int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) { u32 maxattr = tipc_genl_family.maxattr; @@ -272,8 +275,7 @@ int tipc_netlink_start(void) { int res; - res = genl_register_family_with_ops(&tipc_genl_family, - tipc_genl_v2_ops); + res = genl_register_family(&tipc_genl_family); if (res) { pr_err("Failed to register netlink interface\n"); return res; diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index f04428e4c8e5..07b19931e458 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -1215,27 +1215,29 @@ send: return err; } +static struct genl_ops tipc_genl_compat_ops[] = { + { + .cmd = TIPC_GENL_CMD, + .doit = tipc_nl_compat_recv, + }, +}; + static struct genl_family tipc_genl_compat_family = { .name = TIPC_GENL_NAME, .version = TIPC_GENL_VERSION, .hdrsize = TIPC_GENL_HDRLEN, .maxattr = 0, .netnsok = true, -}; - -static struct genl_ops tipc_genl_compat_ops[] = { - { - .cmd = TIPC_GENL_CMD, - .doit = tipc_nl_compat_recv, - }, + .module = THIS_MODULE, + .ops = tipc_genl_compat_ops, + .n_ops = ARRAY_SIZE(tipc_genl_compat_ops), }; int tipc_netlink_compat_start(void) { int res; - res = genl_register_family_with_ops(&tipc_genl_compat_family, - tipc_genl_compat_ops); + res = genl_register_family(&tipc_genl_compat_family); if (res) { pr_err("Failed to register legacy compat interface\n"); return res; diff --git a/net/wimax/stack.c b/net/wimax/stack.c index 8ac83a41585f..587e1627681f 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -572,15 +572,20 @@ struct d_level D_LEVEL[] = { size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL); +static const struct genl_multicast_group wimax_gnl_mcgrps[] = { + { .name = "msg", }, +}; + struct genl_family wimax_gnl_family = { .name = "WiMAX", .version = WIMAX_GNL_VERSION, .hdrsize = 0, .maxattr = WIMAX_GNL_ATTR_MAX, -}; - -static const struct genl_multicast_group wimax_gnl_mcgrps[] = { - { .name = "msg", }, + .module = THIS_MODULE, + .ops = wimax_gnl_ops, + .n_ops = ARRAY_SIZE(wimax_gnl_ops), + .mcgrps = wimax_gnl_mcgrps, + .n_mcgrps = ARRAY_SIZE(wimax_gnl_mcgrps), }; @@ -595,11 +600,7 @@ int __init wimax_subsys_init(void) d_parse_params(D_LEVEL, D_LEVEL_SIZE, wimax_debug_params, "wimax.debug"); - snprintf(wimax_gnl_family.name, sizeof(wimax_gnl_family.name), - "WiMAX"); - result = genl_register_family_with_ops_groups(&wimax_gnl_family, - wimax_gnl_ops, - wimax_gnl_mcgrps); + result = genl_register_family(&wimax_gnl_family); if (unlikely(result < 0)) { pr_err("cannot register generic netlink family: %d\n", result); goto error_register_family; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 714beafe05e0..8e5ca3c47593 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -32,21 +32,8 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, struct cfg80211_crypto_settings *settings, int cipher_limit); -static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, - struct genl_info *info); -static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb, - struct genl_info *info); - /* the netlink family */ -static struct genl_family nl80211_fam = { - .name = NL80211_GENL_NAME, /* have users key off the name instead */ - .hdrsize = 0, /* no private header */ - .version = 1, /* no particular meaning now */ - .maxattr = NL80211_ATTR_MAX, - .netnsok = true, - .pre_doit = nl80211_pre_doit, - .post_doit = nl80211_post_doit, -}; +static struct genl_family nl80211_fam; /* multicast groups */ enum nl80211_multicast_groups { @@ -12599,6 +12586,21 @@ static const struct genl_ops nl80211_ops[] = { }, }; +static struct genl_family nl80211_fam = { + .name = NL80211_GENL_NAME, /* have users key off the name instead */ + .hdrsize = 0, /* no private header */ + .version = 1, /* no particular meaning now */ + .maxattr = NL80211_ATTR_MAX, + .netnsok = true, + .pre_doit = nl80211_pre_doit, + .post_doit = nl80211_post_doit, + .module = THIS_MODULE, + .ops = nl80211_ops, + .n_ops = ARRAY_SIZE(nl80211_ops), + .mcgrps = nl80211_mcgrps, + .n_mcgrps = ARRAY_SIZE(nl80211_mcgrps), +}; + /* notification functions */ void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev, @@ -14565,8 +14567,7 @@ int nl80211_init(void) { int err; - err = genl_register_family_with_ops_groups(&nl80211_fam, nl80211_ops, - nl80211_mcgrps); + err = genl_register_family(&nl80211_fam); if (err) return err; -- cgit v1.2.3 From 56989f6d8568c21257dcec0f5e644d5570ba3281 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Oct 2016 14:40:05 +0200 Subject: genetlink: mark families as __ro_after_init Now genl_register_family() is the only thing (other than the users themselves, perhaps, but I didn't find any doing that) writing to the family struct. In all families that I found, genl_register_family() is only called from __init functions (some indirectly, in which case I've add __init annotations to clarifly things), so all can actually be marked __ro_after_init. This protects the data structure from accidental corruption. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- drivers/acpi/event.c | 4 ++-- drivers/net/gtp.c | 2 +- drivers/net/macsec.c | 2 +- drivers/net/team/team.c | 4 ++-- drivers/net/wireless/mac80211_hwsim.c | 4 ++-- drivers/scsi/pmcraid.c | 4 ++-- drivers/target/target_core_user.c | 2 +- drivers/thermal/thermal_core.c | 4 ++-- fs/dlm/netlink.c | 2 +- fs/quota/netlink.c | 2 +- include/linux/genl_magic_func.h | 2 +- kernel/taskstats.c | 2 +- net/batman-adv/netlink.c | 2 +- net/core/devlink.c | 2 +- net/core/drop_monitor.c | 2 +- net/hsr/hsr_netlink.c | 2 +- net/ieee802154/netlink.c | 2 +- net/ieee802154/nl802154.c | 4 ++-- net/ipv4/fou.c | 2 +- net/ipv4/tcp_metrics.c | 2 +- net/ipv6/ila/ila_xlat.c | 4 ++-- net/irda/irnetlink.c | 4 ++-- net/l2tp/l2tp_netlink.c | 4 ++-- net/netfilter/ipvs/ip_vs_ctl.c | 2 +- net/netlabel/netlabel_calipso.c | 2 +- net/netlabel/netlabel_cipso_v4.c | 2 +- net/netlabel/netlabel_mgmt.c | 2 +- net/netlabel/netlabel_unlabeled.c | 2 +- net/netlink/genetlink.c | 2 +- net/nfc/netlink.c | 2 +- net/openvswitch/datapath.c | 10 +++++----- net/tipc/netlink.c | 4 ++-- net/tipc/netlink_compat.c | 4 ++-- net/wimax/stack.c | 2 +- net/wireless/nl80211.c | 4 ++-- 35 files changed, 51 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c index 1ab12ad7d5ba..7fceb3b4691b 100644 --- a/drivers/acpi/event.c +++ b/drivers/acpi/event.c @@ -82,7 +82,7 @@ static const struct genl_multicast_group acpi_event_mcgrps[] = { { .name = ACPI_GENL_MCAST_GROUP_NAME, }, }; -static struct genl_family acpi_event_genl_family = { +static struct genl_family acpi_event_genl_family __ro_after_init = { .module = THIS_MODULE, .name = ACPI_GENL_FAMILY_NAME, .version = ACPI_GENL_VERSION, @@ -144,7 +144,7 @@ int acpi_bus_generate_netlink_event(const char *device_class, EXPORT_SYMBOL(acpi_bus_generate_netlink_event); -static int acpi_event_genetlink_init(void) +static int __init acpi_event_genetlink_init(void) { return genl_register_family(&acpi_event_genl_family); } diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 0604fd78f826..719d19f35673 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1290,7 +1290,7 @@ static const struct genl_ops gtp_genl_ops[] = { }, }; -static struct genl_family gtp_genl_family = { +static struct genl_family gtp_genl_family __ro_after_init = { .name = "gtp", .version = 0, .hdrsize = 0, diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 63ca7a3c77cf..0a715ab9d9cc 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2648,7 +2648,7 @@ static const struct genl_ops macsec_genl_ops[] = { }, }; -static struct genl_family macsec_fam = { +static struct genl_family macsec_fam __ro_after_init = { .name = MACSEC_GENL_NAME, .hdrsize = 0, .version = MACSEC_GENL_VERSION, diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 46bf7c1216c0..bdc58567d10e 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2740,7 +2740,7 @@ static const struct genl_multicast_group team_nl_mcgrps[] = { { .name = TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME, }, }; -static struct genl_family team_nl_family = { +static struct genl_family team_nl_family __ro_after_init = { .name = TEAM_GENL_NAME, .version = TEAM_GENL_VERSION, .maxattr = TEAM_ATTR_MAX, @@ -2773,7 +2773,7 @@ static int team_nl_send_event_port_get(struct team *team, port); } -static int team_nl_init(void) +static int __init team_nl_init(void) { return genl_register_family(&team_nl_family); } diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 5d4637e586e8..220e9dc8ccf8 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3228,7 +3228,7 @@ static const struct genl_ops hwsim_ops[] = { }, }; -static struct genl_family hwsim_genl_family = { +static struct genl_family hwsim_genl_family __ro_after_init = { .name = "MAC80211_HWSIM", .version = 1, .maxattr = HWSIM_ATTR_MAX, @@ -3287,7 +3287,7 @@ static struct notifier_block hwsim_netlink_notifier = { .notifier_call = mac80211_hwsim_netlink_notify, }; -static int hwsim_init_netlink(void) +static int __init hwsim_init_netlink(void) { int rc; diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index c0ab7bb8c3ce..845affa112f7 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -1368,7 +1368,7 @@ static struct genl_multicast_group pmcraid_mcgrps[] = { { .name = "events", /* not really used - see ID discussion below */ }, }; -static struct genl_family pmcraid_event_family = { +static struct genl_family pmcraid_event_family __ro_after_init = { .module = THIS_MODULE, .name = "pmcraid", .version = 1, @@ -1384,7 +1384,7 @@ static struct genl_family pmcraid_event_family = { * 0 if the pmcraid_event_family is successfully registered * with netlink generic, non-zero otherwise */ -static int pmcraid_netlink_init(void) +static int __init pmcraid_netlink_init(void) { int result; diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 3483372f5562..0f173bf7dbac 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -147,7 +147,7 @@ static const struct genl_multicast_group tcmu_mcgrps[] = { }; /* Our generic netlink family */ -static struct genl_family tcmu_genl_family = { +static struct genl_family tcmu_genl_family __ro_after_init = { .module = THIS_MODULE, .hdrsize = 0, .name = "TCM-USER", diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 93b6caab2d9f..911fd964c742 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -2163,7 +2163,7 @@ static const struct genl_multicast_group thermal_event_mcgrps[] = { { .name = THERMAL_GENL_MCAST_GROUP_NAME, }, }; -static struct genl_family thermal_event_genl_family = { +static struct genl_family thermal_event_genl_family __ro_after_init = { .module = THIS_MODULE, .name = THERMAL_GENL_FAMILY_NAME, .version = THERMAL_GENL_VERSION, @@ -2235,7 +2235,7 @@ int thermal_generate_netlink_event(struct thermal_zone_device *tz, } EXPORT_SYMBOL_GPL(thermal_generate_netlink_event); -static int genetlink_init(void) +static int __init genetlink_init(void) { return genl_register_family(&thermal_event_genl_family); } diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index 04042d69573c..0643ae44f342 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -72,7 +72,7 @@ static struct genl_ops dlm_nl_ops[] = { }, }; -static struct genl_family family = { +static struct genl_family family __ro_after_init = { .name = DLM_GENL_NAME, .version = DLM_GENL_VERSION, .ops = dlm_nl_ops, diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c index 9457c7b0dfa2..e99b1a72d9a7 100644 --- a/fs/quota/netlink.c +++ b/fs/quota/netlink.c @@ -12,7 +12,7 @@ static const struct genl_multicast_group quota_mcgrps[] = { }; /* Netlink family structure for quota */ -static struct genl_family quota_genl_family = { +static struct genl_family quota_genl_family __ro_after_init = { .module = THIS_MODULE, .hdrsize = 0, .name = "VFS_DQUOT", diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h index 40c2e39362c8..377257d8f7e3 100644 --- a/include/linux/genl_magic_func.h +++ b/include/linux/genl_magic_func.h @@ -293,7 +293,7 @@ static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \ #undef GENL_mc_group #define GENL_mc_group(group) -static struct genl_family ZZZ_genl_family __read_mostly = { +static struct genl_family ZZZ_genl_family __ro_after_init = { .name = __stringify(GENL_MAGIC_FAMILY), .version = GENL_MAGIC_VERSION, #ifdef GENL_MAGIC_FAMILY_HDRSZ diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 4075ece592f2..9b7f838511ce 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -646,7 +646,7 @@ static const struct genl_ops taskstats_ops[] = { }, }; -static struct genl_family family = { +static struct genl_family family __ro_after_init = { .name = TASKSTATS_GENL_NAME, .version = TASKSTATS_GENL_VERSION, .maxattr = TASKSTATS_CMD_ATTR_MAX, diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index e28cec34a016..005012ba9b48 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -603,7 +603,7 @@ static struct genl_ops batadv_netlink_ops[] = { }; -struct genl_family batadv_netlink_family = { +struct genl_family batadv_netlink_family __ro_after_init = { .hdrsize = 0, .name = BATADV_NL_NAME, .version = 1, diff --git a/net/core/devlink.c b/net/core/devlink.c index 063da8091aef..c14f8b661db9 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1612,7 +1612,7 @@ static const struct genl_ops devlink_nl_ops[] = { }, }; -static struct genl_family devlink_nl_family = { +static struct genl_family devlink_nl_family __ro_after_init = { .name = DEVLINK_GENL_NAME, .version = DEVLINK_GENL_VERSION, .maxattr = DEVLINK_ATTR_MAX, diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 80c002794ff6..8e0c0635ee97 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -346,7 +346,7 @@ static const struct genl_ops dropmon_ops[] = { }, }; -static struct genl_family net_drop_monitor_family = { +static struct genl_family net_drop_monitor_family __ro_after_init = { .hdrsize = 0, .name = "NET_DM", .version = 2, diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index aab34c7f6f89..1ab30e7d3f99 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -461,7 +461,7 @@ static const struct genl_ops hsr_ops[] = { }, }; -static struct genl_family hsr_genl_family = { +static struct genl_family hsr_genl_family __ro_after_init = { .hdrsize = 0, .name = "HSR", .version = 1, diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 08e62470bac2..6bde9e5a5503 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -131,7 +131,7 @@ static const struct genl_multicast_group ieee802154_mcgrps[] = { [IEEE802154_BEACON_MCGRP] = { .name = IEEE802154_MCAST_BEACON_NAME, }, }; -struct genl_family nl802154_family = { +struct genl_family nl802154_family __ro_after_init = { .hdrsize = 0, .name = IEEE802154_NL_NAME, .version = 1, diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index f7e75578aedd..fc60cd061f39 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -2462,7 +2462,7 @@ static const struct genl_ops nl802154_ops[] = { #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */ }; -static struct genl_family nl802154_fam = { +static struct genl_family nl802154_fam __ro_after_init = { .name = NL802154_GENL_NAME, /* have users key off the name instead */ .hdrsize = 0, /* no private header */ .version = 1, /* no particular meaning now */ @@ -2478,7 +2478,7 @@ static struct genl_family nl802154_fam = { }; /* initialisation/exit functions */ -int nl802154_init(void) +int __init nl802154_init(void) { return genl_register_family(&nl802154_fam); } diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 5b5226a2434f..6cb57bb8692d 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -824,7 +824,7 @@ static const struct genl_ops fou_nl_ops[] = { }, }; -static struct genl_family fou_nl_family = { +static struct genl_family fou_nl_family __ro_after_init = { .hdrsize = 0, .name = FOU_GENL_NAME, .version = FOU_GENL_VERSION, diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index bba3c72c4a39..d46f4d5b1c62 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -1109,7 +1109,7 @@ static const struct genl_ops tcp_metrics_nl_ops[] = { }, }; -static struct genl_family tcp_metrics_nl_family = { +static struct genl_family tcp_metrics_nl_family __ro_after_init = { .hdrsize = 0, .name = TCP_METRICS_GENL_NAME, .version = TCP_METRICS_GENL_VERSION, diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 97f7b0cc4675..628ae6d85b59 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -553,7 +553,7 @@ static const struct genl_ops ila_nl_ops[] = { }, }; -static struct genl_family ila_nl_family = { +static struct genl_family ila_nl_family __ro_after_init = { .hdrsize = 0, .name = ILA_GENL_NAME, .version = ILA_GENL_VERSION, @@ -627,7 +627,7 @@ static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral) return 0; } -int ila_xlat_init(void) +int __init ila_xlat_init(void) { int ret; diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c index 07877347c2f7..7fc340e574cf 100644 --- a/net/irda/irnetlink.c +++ b/net/irda/irnetlink.c @@ -141,7 +141,7 @@ static const struct genl_ops irda_nl_ops[] = { }; -static struct genl_family irda_nl_family = { +static struct genl_family irda_nl_family __ro_after_init = { .name = IRDA_NL_NAME, .hdrsize = 0, .version = IRDA_NL_VERSION, @@ -151,7 +151,7 @@ static struct genl_family irda_nl_family = { .n_ops = ARRAY_SIZE(irda_nl_ops), }; -int irda_nl_register(void) +int __init irda_nl_register(void) { return genl_register_family(&irda_nl_family); } diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index e4e8c0769a6b..59aa2d204e4a 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -970,7 +970,7 @@ static const struct genl_ops l2tp_nl_ops[] = { }, }; -static struct genl_family l2tp_nl_family = { +static struct genl_family l2tp_nl_family __ro_after_init = { .name = L2TP_GENL_NAME, .version = L2TP_GENL_VERSION, .hdrsize = 0, @@ -1016,7 +1016,7 @@ void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type) } EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops); -static int l2tp_nl_init(void) +static int __init l2tp_nl_init(void) { pr_info("L2TP netlink interface\n"); return genl_register_family(&l2tp_nl_family); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ea3e8aed063f..6b85ded4f91d 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3865,7 +3865,7 @@ static const struct genl_ops ip_vs_genl_ops[] = { }, }; -static struct genl_family ip_vs_genl_family = { +static struct genl_family ip_vs_genl_family __ro_after_init = { .hdrsize = 0, .name = IPVS_GENL_NAME, .version = IPVS_GENL_VERSION, diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c index ca7c9c411a5c..d177dd066504 100644 --- a/net/netlabel/netlabel_calipso.c +++ b/net/netlabel/netlabel_calipso.c @@ -349,7 +349,7 @@ static const struct genl_ops netlbl_calipso_ops[] = { }, }; -static struct genl_family netlbl_calipso_gnl_family = { +static struct genl_family netlbl_calipso_gnl_family __ro_after_init = { .hdrsize = 0, .name = NETLBL_NLTYPE_CALIPSO_NAME, .version = NETLBL_PROTO_VERSION, diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index a665eae91245..4149d3e63589 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -760,7 +760,7 @@ static const struct genl_ops netlbl_cipsov4_ops[] = { }, }; -static struct genl_family netlbl_cipsov4_gnl_family = { +static struct genl_family netlbl_cipsov4_gnl_family __ro_after_init = { .hdrsize = 0, .name = NETLBL_NLTYPE_CIPSOV4_NAME, .version = NETLBL_PROTO_VERSION, diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index ecfe8eb149db..21e0095b1d14 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -828,7 +828,7 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = { }, }; -static struct genl_family netlbl_mgmt_gnl_family = { +static struct genl_family netlbl_mgmt_gnl_family __ro_after_init = { .hdrsize = 0, .name = NETLBL_NLTYPE_MGMT_NAME, .version = NETLBL_PROTO_VERSION, diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 5dbbad41114f..22dc1b9d6362 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1372,7 +1372,7 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = { }, }; -static struct genl_family netlbl_unlabel_gnl_family = { +static struct genl_family netlbl_unlabel_gnl_family __ro_after_init = { .hdrsize = 0, .name = NETLBL_NLTYPE_UNLABELED_NAME, .version = NETLBL_PROTO_VERSION, diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 85659921e7b2..df0cbcddda2c 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -936,7 +936,7 @@ static const struct genl_multicast_group genl_ctrl_groups[] = { { .name = "notify", }, }; -static struct genl_family genl_ctrl = { +static struct genl_family genl_ctrl __ro_after_init = { .module = THIS_MODULE, .ops = genl_ctrl_ops, .n_ops = ARRAY_SIZE(genl_ctrl_ops), diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 450b1e5144cc..03f3d5c7beb8 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1746,7 +1746,7 @@ static const struct genl_ops nfc_genl_ops[] = { }, }; -static struct genl_family nfc_genl_family = { +static struct genl_family nfc_genl_family __ro_after_init = { .hdrsize = 0, .name = NFC_GENL_NAME, .version = NFC_GENL_VERSION, diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index ad6a111a0014..fa8760176b7d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -670,7 +670,7 @@ static const struct genl_ops dp_packet_genl_ops[] = { } }; -static struct genl_family dp_packet_genl_family = { +static struct genl_family dp_packet_genl_family __ro_after_init = { .hdrsize = sizeof(struct ovs_header), .name = OVS_PACKET_FAMILY, .version = OVS_PACKET_VERSION, @@ -1435,7 +1435,7 @@ static const struct genl_ops dp_flow_genl_ops[] = { }, }; -static struct genl_family dp_flow_genl_family = { +static struct genl_family dp_flow_genl_family __ro_after_init = { .hdrsize = sizeof(struct ovs_header), .name = OVS_FLOW_FAMILY, .version = OVS_FLOW_VERSION, @@ -1821,7 +1821,7 @@ static const struct genl_ops dp_datapath_genl_ops[] = { }, }; -static struct genl_family dp_datapath_genl_family = { +static struct genl_family dp_datapath_genl_family __ro_after_init = { .hdrsize = sizeof(struct ovs_header), .name = OVS_DATAPATH_FAMILY, .version = OVS_DATAPATH_VERSION, @@ -2243,7 +2243,7 @@ static const struct genl_ops dp_vport_genl_ops[] = { }, }; -struct genl_family dp_vport_genl_family = { +struct genl_family dp_vport_genl_family __ro_after_init = { .hdrsize = sizeof(struct ovs_header), .name = OVS_VPORT_FAMILY, .version = OVS_VPORT_VERSION, @@ -2272,7 +2272,7 @@ static void dp_unregister_genl(int n_families) genl_unregister_family(dp_genl_families[i]); } -static int dp_register_genl(void) +static int __init dp_register_genl(void) { int err; int i; diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 74a405bf107b..26ca8dd64ded 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -249,7 +249,7 @@ static const struct genl_ops tipc_genl_v2_ops[] = { #endif }; -struct genl_family tipc_genl_family = { +struct genl_family tipc_genl_family __ro_after_init = { .name = TIPC_GENL_V2_NAME, .version = TIPC_GENL_V2_VERSION, .hdrsize = 0, @@ -271,7 +271,7 @@ int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy); } -int tipc_netlink_start(void) +int __init tipc_netlink_start(void) { int res; diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 07b19931e458..e1ae8a8a2b8e 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -1222,7 +1222,7 @@ static struct genl_ops tipc_genl_compat_ops[] = { }, }; -static struct genl_family tipc_genl_compat_family = { +static struct genl_family tipc_genl_compat_family __ro_after_init = { .name = TIPC_GENL_NAME, .version = TIPC_GENL_VERSION, .hdrsize = TIPC_GENL_HDRLEN, @@ -1233,7 +1233,7 @@ static struct genl_family tipc_genl_compat_family = { .n_ops = ARRAY_SIZE(tipc_genl_compat_ops), }; -int tipc_netlink_compat_start(void) +int __init tipc_netlink_compat_start(void) { int res; diff --git a/net/wimax/stack.c b/net/wimax/stack.c index 587e1627681f..5db731512014 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -576,7 +576,7 @@ static const struct genl_multicast_group wimax_gnl_mcgrps[] = { { .name = "msg", }, }; -struct genl_family wimax_gnl_family = { +struct genl_family wimax_gnl_family __ro_after_init = { .name = "WiMAX", .version = WIMAX_GNL_VERSION, .hdrsize = 0, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8e5ca3c47593..271707dacfea 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -12586,7 +12586,7 @@ static const struct genl_ops nl80211_ops[] = { }, }; -static struct genl_family nl80211_fam = { +static struct genl_family nl80211_fam __ro_after_init = { .name = NL80211_GENL_NAME, /* have users key off the name instead */ .hdrsize = 0, /* no private header */ .version = 1, /* no particular meaning now */ @@ -14563,7 +14563,7 @@ void nl80211_send_ap_stopped(struct wireless_dev *wdev) /* initialisation/exit functions */ -int nl80211_init(void) +int __init nl80211_init(void) { int err; -- cgit v1.2.3 From 7f847dd31736f1284538e54f46cf10e63929eb7f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 20 Oct 2016 22:07:53 +0200 Subject: debugfs: improve DEFINE_DEBUGFS_ATTRIBUTE for !CONFIG_DEBUG_FS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The slp_s0_residency_usec debugfs file currently uses DEFINE_DEBUGFS_ATTRIBUTE(), but that macro cannot really be used to define files outside of the debugfs code, as it has no reference to the get/set functions if CONFIG_DEBUG_FS is not defined: drivers/platform/x86/intel_pmc_core.c:80:12: error: ‘pmc_core_dev_state_get’ defined but not used [-Werror=unused-function] This fixes the macro to always contain the reference, and instead rely on the stubbed-out debugfs_create_file to not actually refer to its arguments so the compiler can still drop the reference. This works because the attribute definition is always 'static', and the dead-code removal silently drops all static symbols that are not used. Fixes: c64688081490 ("debugfs: add support for self-protecting attribute file fops") Fixes: df2294fb6428 ("intel_pmc_core: Convert to DEFINE_DEBUGFS_ATTRIBUTE") Signed-off-by: Arnd Bergmann [nicstange@gmail.com: Add dummy implementations of debugfs_attr_read() and debugfs_attr_write() in order to protect against possibly broken dead code elimination and to improve readability. Correct CONFIG_DEBUGFS_FS -> CONFIG_DEBUG_FS typo in changelog.] Signed-off-by: Nicolai Stange Reviewed-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 4d3f0d1aec73..1b413a9aab81 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -62,6 +62,21 @@ static inline const struct file_operations *debugfs_real_fops(struct file *filp) return filp->f_path.dentry->d_fsdata; } +#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ +static int __fops ## _open(struct inode *inode, struct file *file) \ +{ \ + __simple_attr_check_format(__fmt, 0ull); \ + return simple_attr_open(inode, file, __get, __set, __fmt); \ +} \ +static const struct file_operations __fops = { \ + .owner = THIS_MODULE, \ + .open = __fops ## _open, \ + .release = simple_attr_release, \ + .read = debugfs_attr_read, \ + .write = debugfs_attr_write, \ + .llseek = generic_file_llseek, \ +} + #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_create_file(const char *name, umode_t mode, @@ -99,21 +114,6 @@ ssize_t debugfs_attr_read(struct file *file, char __user *buf, ssize_t debugfs_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos); -#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ -static int __fops ## _open(struct inode *inode, struct file *file) \ -{ \ - __simple_attr_check_format(__fmt, 0ull); \ - return simple_attr_open(inode, file, __get, __set, __fmt); \ -} \ -static const struct file_operations __fops = { \ - .owner = THIS_MODULE, \ - .open = __fops ## _open, \ - .release = simple_attr_release, \ - .read = debugfs_attr_read, \ - .write = debugfs_attr_write, \ - .llseek = generic_file_llseek, \ -} - struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, const char *new_name); @@ -233,8 +233,18 @@ static inline void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu) { } -#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ - static const struct file_operations __fops = { 0 } +static inline ssize_t debugfs_attr_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + return -ENODEV; +} + +static inline ssize_t debugfs_attr_write(struct file *file, + const char __user *buf, + size_t len, loff_t *ppos) +{ + return -ENODEV; +} static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, char *new_name) -- cgit v1.2.3 From 5be149bdd36c8765c9e785f70b888d028ada40af Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 19 Oct 2016 16:33:27 +0300 Subject: mei: bus: add module_mei_cl_driver helper macro Add module_mei_cl_driver helper macro for eliminating the boilerplate code from mei_cl drivers registration. The macro is intended for drivers which in their init/exit sections does only register/unregister of a mei_cl driver. Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- include/linux/mei_cl_bus.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index e746919530f5..e6fbd98ea90e 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -74,6 +74,19 @@ int __mei_cldev_driver_register(struct mei_cl_driver *cldrv, void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv); +/** + * module_mei_cl_driver - Helper macro for registering mei cl driver + * + * @__mei_cldrv mei_cl_driver structure + * + * Helper macro for mei cl drivers which do not do anything special in module + * init/exit, for eliminating a boilerplate code. + */ +#define module_mei_cl_driver(__mei_cldrv) \ + module_driver(__mei_cldrv, \ + mei_cldev_driver_register,\ + mei_cldev_driver_unregister) + ssize_t mei_cldev_send(struct mei_cl_device *cldev, u8 *buf, size_t length); ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length); -- cgit v1.2.3 From 1e4edb3fe93ff9f7b678a8f1b8d9df717edf6ad9 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 19 Oct 2016 16:33:31 +0300 Subject: mei: bus: remove rx callback context The callback context is redunant as all the information can be retrived from the device struture of its private data. Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus.c | 6 ++---- drivers/nfc/mei_phy.c | 5 ++--- drivers/watchdog/mei_wdt.c | 6 ++---- include/linux/mei_cl_bus.h | 6 ++---- 4 files changed, 8 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 8cac7ef9ad0d..89a694ca624c 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -228,7 +228,7 @@ static void mei_cl_bus_event_work(struct work_struct *work) bus = cldev->bus; if (cldev->event_cb) - cldev->event_cb(cldev, cldev->events, cldev->event_context); + cldev->event_cb(cldev, cldev->events); cldev->events = 0; @@ -301,7 +301,6 @@ bool mei_cl_bus_rx_event(struct mei_cl *cl) * @cldev: me client devices * @event_cb: callback function * @events_mask: requested events bitmask - * @context: driver context data * * Return: 0 on success * -EALREADY if an callback is already registered @@ -309,7 +308,7 @@ bool mei_cl_bus_rx_event(struct mei_cl *cl) */ int mei_cldev_register_event_cb(struct mei_cl_device *cldev, unsigned long events_mask, - mei_cldev_event_cb_t event_cb, void *context) + mei_cldev_event_cb_t event_cb) { struct mei_device *bus = cldev->bus; int ret; @@ -320,7 +319,6 @@ int mei_cldev_register_event_cb(struct mei_cl_device *cldev, cldev->events = 0; cldev->events_mask = events_mask; cldev->event_cb = event_cb; - cldev->event_context = context; INIT_WORK(&cldev->event_work, mei_cl_bus_event_work); if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) { diff --git a/drivers/nfc/mei_phy.c b/drivers/nfc/mei_phy.c index 66dfd81ffb18..07b4239585fa 100644 --- a/drivers/nfc/mei_phy.c +++ b/drivers/nfc/mei_phy.c @@ -297,8 +297,7 @@ static int mei_nfc_recv(struct nfc_mei_phy *phy, u8 *buf, size_t length) } -static void nfc_mei_event_cb(struct mei_cl_device *cldev, u32 events, - void *context) +static void nfc_mei_event_cb(struct mei_cl_device *cldev, u32 events) { struct nfc_mei_phy *phy = mei_cldev_get_drvdata(cldev); @@ -360,7 +359,7 @@ static int nfc_mei_phy_enable(void *phy_id) } r = mei_cldev_register_event_cb(phy->cldev, BIT(MEI_CL_EVENT_RX), - nfc_mei_event_cb, phy); + nfc_mei_event_cb); if (r) { pr_err("Event cb registration failed %d\n", r); goto err; diff --git a/drivers/watchdog/mei_wdt.c b/drivers/watchdog/mei_wdt.c index 116be477c8fd..e0af52265511 100644 --- a/drivers/watchdog/mei_wdt.c +++ b/drivers/watchdog/mei_wdt.c @@ -501,10 +501,8 @@ static void mei_wdt_notify_event(struct mei_cl_device *cldev) * * @cldev: bus device * @events: event mask - * @context: callback context */ -static void mei_wdt_event(struct mei_cl_device *cldev, - u32 events, void *context) +static void mei_wdt_event(struct mei_cl_device *cldev, u32 events) { if (events & BIT(MEI_CL_EVENT_RX)) mei_wdt_event_rx(cldev); @@ -626,7 +624,7 @@ static int mei_wdt_probe(struct mei_cl_device *cldev, ret = mei_cldev_register_event_cb(wdt->cldev, BIT(MEI_CL_EVENT_RX) | BIT(MEI_CL_EVENT_NOTIF), - mei_wdt_event, NULL); + mei_wdt_event); /* on legacy devices notification is not supported * this doesn't fail the registration for RX event diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index e6fbd98ea90e..4adb2e7c9f84 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -9,7 +9,7 @@ struct mei_cl_device; struct mei_device; typedef void (*mei_cldev_event_cb_t)(struct mei_cl_device *cldev, - u32 events, void *context); + u32 events); /** * struct mei_cl_device - MEI device handle @@ -27,7 +27,6 @@ typedef void (*mei_cldev_event_cb_t)(struct mei_cl_device *cldev, * @event_work: async work to execute event callback * @event_cb: Drivers register this callback to get asynchronous ME * events (e.g. Rx buffer pending) notifications. - * @event_context: event callback run context * @events_mask: Events bit mask requested by driver. * @events: Events bitmask sent to the driver. * @@ -46,7 +45,6 @@ struct mei_cl_device { struct work_struct event_work; mei_cldev_event_cb_t event_cb; - void *event_context; unsigned long events_mask; unsigned long events; @@ -92,7 +90,7 @@ ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length); int mei_cldev_register_event_cb(struct mei_cl_device *cldev, unsigned long event_mask, - mei_cldev_event_cb_t read_cb, void *context); + mei_cldev_event_cb_t read_cb); #define MEI_CL_EVENT_RX 0 #define MEI_CL_EVENT_TX 1 -- cgit v1.2.3 From f71082f815bf0b9599b97546cf1dd057c789c3c1 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Sat, 22 Oct 2016 18:36:05 -0400 Subject: miscdevice: introduce builtin_misc_device This is basically the same as module_misc_device but without the presence of an exit/remove function. Similar in nature to the commit f309d4443130bf814e991f836e919dca22df37ae ("platform_device: better support builtin boilerplate avoidance"). Cc: Arnd Bergmann Signed-off-by: Paul Gortmaker Signed-off-by: Greg Kroah-Hartman --- include/linux/miscdevice.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 722698a43d79..a426cb55dc43 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -71,6 +71,13 @@ struct miscdevice { extern int misc_register(struct miscdevice *misc); extern void misc_deregister(struct miscdevice *misc); +/* + * Helper macro for drivers that don't do anything special in the initcall. + * This helps in eleminating of boilerplate code. + */ +#define builtin_misc_device(__misc_device) \ + builtin_driver(__misc_device, misc_register) + /* * Helper macro for drivers that don't do anything special in module init / exit * call. This helps in eleminating of boilerplate code. -- cgit v1.2.3 From c4aebd0332da831a3403faf2035af45059ab6b7c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:12:09 +0200 Subject: block: remove bio_is_rw With the addition of the zoned operations the tests in this function became incorrect. But I think it's much better to just open code the allow operations in the only caller anyway. Signed-off-by: Christoph Hellwig Reviewed-by: Shaun Tancheff Signed-off-by: Jens Axboe --- block/bio-integrity.c | 2 +- include/linux/bio.h | 11 ----------- 2 files changed, 1 insertion(+), 12 deletions(-) (limited to 'include/linux') diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 63f72f00c72e..5384713d48bc 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -172,7 +172,7 @@ bool bio_integrity_enabled(struct bio *bio) { struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); - if (!bio_is_rw(bio)) + if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE) return false; /* Already protected? */ diff --git a/include/linux/bio.h b/include/linux/bio.h index 97cb48f03dc7..87ce64dafb93 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -83,17 +83,6 @@ static inline bool bio_no_advance_iter(struct bio *bio) bio_op(bio) == REQ_OP_WRITE_SAME; } -static inline bool bio_is_rw(struct bio *bio) -{ - if (!bio_has_data(bio)) - return false; - - if (bio_no_advance_iter(bio)) - return false; - - return true; -} - static inline bool bio_mergeable(struct bio *bio) { if (bio->bi_opf & REQ_NOMERGE_FLAGS) -- cgit v1.2.3 From bd1c1c21741cbd6e894960bcbc8b36f719590064 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:12:10 +0200 Subject: block: REQ_NOMERGE is common to the bio and request So move it into the common setion of the request flags. Signed-off-by: Christoph Hellwig Reviewed-by: Shaun Tancheff Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index dd50dce89a80..b54142534793 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -158,6 +158,7 @@ enum rq_flag_bits { __REQ_META, /* metadata io request */ __REQ_PRIO, /* boost priority in cfq */ + __REQ_NOMERGE, /* don't touch this for merging */ __REQ_NOIDLE, /* don't anticipate more IO after this one */ __REQ_INTEGRITY, /* I/O includes block integrity payload */ __REQ_FUA, /* forced unit access */ @@ -171,7 +172,6 @@ enum rq_flag_bits { /* request only flags */ __REQ_SORTED, /* elevator knows about this request */ __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ - __REQ_NOMERGE, /* don't touch this for merging */ __REQ_STARTED, /* drive already may have started this one */ __REQ_DONTPREP, /* don't call prep for this one */ __REQ_QUEUED, /* uses queueing */ -- cgit v1.2.3 From 188bd2b16b3c6ea87a90df20f33db0adcdb75f0c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:12:11 +0200 Subject: block: move REQ_RAHEAD to common flags The information that am I/O is a read-ahead can be useful for drivers. In fact the NVMe driver already checks it, even if it won't ever be set at the moment. Signed-off-by: Christoph Hellwig Reviewed-by: Shaun Tancheff Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index b54142534793..44f9bca332e5 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -163,9 +163,9 @@ enum rq_flag_bits { __REQ_INTEGRITY, /* I/O includes block integrity payload */ __REQ_FUA, /* forced unit access */ __REQ_PREFLUSH, /* request for cache flush */ + __REQ_RAHEAD, /* read ahead, can fail anytime */ /* bio only flags */ - __REQ_RAHEAD, /* read ahead, can fail anytime */ __REQ_THROTTLED, /* This bio has already been subjected to * throttling rules. Don't do it again. */ @@ -205,7 +205,7 @@ enum rq_flag_bits { (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ (REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | REQ_NOIDLE | \ - REQ_PREFLUSH | REQ_FUA | REQ_INTEGRITY | REQ_NOMERGE) + REQ_PREFLUSH | REQ_FUA | REQ_INTEGRITY | REQ_NOMERGE | REQ_RAHEAD) #define REQ_CLONE_MASK REQ_COMMON_MASK /* This mask is used for both bio and request merge checking */ -- cgit v1.2.3 From 8d2bbd4c8236e9e38e6b36ac9e2c54fdcfe5b335 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:12:12 +0200 Subject: block: replace REQ_THROTTLED with a bio flag It's the last bio-only REQ_* flag, and we have space for it in the bio bi_flags field. Signed-off-by: Christoph Hellwig Reviewed-by: Shaun Tancheff Signed-off-by: Jens Axboe --- block/blk-throttle.c | 10 +++++----- include/linux/blk_types.h | 8 ++------ 2 files changed, 7 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index a3ea8260c94c..a6bb4fe326c3 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -818,13 +818,13 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) tg->io_disp[rw]++; /* - * REQ_THROTTLED is used to prevent the same bio to be throttled + * BIO_THROTTLED is used to prevent the same bio to be throttled * more than once as a throttled bio will go through blk-throtl the * second time when it eventually gets issued. Set it when a bio * is being charged to a tg. */ - if (!(bio->bi_opf & REQ_THROTTLED)) - bio->bi_opf |= REQ_THROTTLED; + if (!bio_flagged(bio, BIO_THROTTLED)) + bio_set_flag(bio, BIO_THROTTLED); } /** @@ -1401,7 +1401,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, WARN_ON_ONCE(!rcu_read_lock_held()); /* see throtl_charge_bio() */ - if ((bio->bi_opf & REQ_THROTTLED) || !tg->has_rules[rw]) + if (bio_flagged(bio, BIO_THROTTLED) || !tg->has_rules[rw]) goto out; spin_lock_irq(q->queue_lock); @@ -1480,7 +1480,7 @@ out: * being issued. */ if (!throttled) - bio->bi_opf &= ~REQ_THROTTLED; + bio_clear_flag(bio, BIO_THROTTLED); return throttled; } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 44f9bca332e5..6df722de2e22 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -119,6 +119,8 @@ struct bio { #define BIO_QUIET 6 /* Make BIO Quiet */ #define BIO_CHAIN 7 /* chained bio, ->bi_remaining in effect */ #define BIO_REFFED 8 /* bio has elevated ->bi_cnt */ +#define BIO_THROTTLED 9 /* This bio has already been subjected to + * throttling rules. Don't do it again. */ /* * Flags starting here get preserved by bio_reset() - this includes @@ -165,10 +167,6 @@ enum rq_flag_bits { __REQ_PREFLUSH, /* request for cache flush */ __REQ_RAHEAD, /* read ahead, can fail anytime */ - /* bio only flags */ - __REQ_THROTTLED, /* This bio has already been subjected to - * throttling rules. Don't do it again. */ - /* request only flags */ __REQ_SORTED, /* elevator knows about this request */ __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ @@ -213,8 +211,6 @@ enum rq_flag_bits { (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_PREFLUSH | REQ_FUA | REQ_FLUSH_SEQ) #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) -#define REQ_THROTTLED (1ULL << __REQ_THROTTLED) - #define REQ_SORTED (1ULL << __REQ_SORTED) #define REQ_SOFTBARRIER (1ULL << __REQ_SOFTBARRIER) #define REQ_FUA (1ULL << __REQ_FUA) -- cgit v1.2.3 From e806402130c9c494e22c73ae9ead4e79d2a5811c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:12:13 +0200 Subject: block: split out request-only flags into a new namespace A lot of the REQ_* flags are only used on struct requests, and only of use to the block layer and a few drivers that dig into struct request internals. This patch adds a new req_flags_t rq_flags field to struct request for them, and thus dramatically shrinks the number of common requests. It also removes the unfortunate situation where we have to fit the fields from the same enum into 32 bits for struct bio and 64 bits for struct request. Signed-off-by: Christoph Hellwig Reviewed-by: Shaun Tancheff Signed-off-by: Jens Axboe --- Documentation/block/biodoc.txt | 2 +- block/blk-core.c | 71 ++++++++++++++------------- block/blk-exec.c | 2 +- block/blk-flush.c | 9 ++-- block/blk-map.c | 4 +- block/blk-merge.c | 8 +-- block/blk-mq.c | 19 ++++---- block/blk-tag.c | 6 +-- block/blk.h | 4 +- block/elevator.c | 32 ++++++------ drivers/block/pktcdvd.c | 2 +- drivers/ide/ide-atapi.c | 6 +-- drivers/ide/ide-cd.c | 46 +++++++++--------- drivers/ide/ide-cd.h | 2 +- drivers/ide/ide-cd_ioctl.c | 6 +-- drivers/ide/ide-io.c | 6 +-- drivers/ide/ide-pm.c | 4 +- drivers/md/dm-rq.c | 12 ++--- drivers/memstick/core/ms_block.c | 2 +- drivers/memstick/core/mspro_block.c | 2 +- drivers/mmc/card/block.c | 4 +- drivers/mmc/card/queue.c | 4 +- drivers/nvme/host/pci.c | 4 +- drivers/scsi/device_handler/scsi_dh_alua.c | 8 +-- drivers/scsi/device_handler/scsi_dh_emc.c | 2 +- drivers/scsi/device_handler/scsi_dh_hp_sw.c | 2 +- drivers/scsi/device_handler/scsi_dh_rdac.c | 2 +- drivers/scsi/osd/osd_initiator.c | 2 +- drivers/scsi/osst.c | 2 +- drivers/scsi/scsi_error.c | 2 +- drivers/scsi/scsi_lib.c | 75 +++++++++++++++++------------ drivers/scsi/sd.c | 6 +-- drivers/scsi/sd_zbc.c | 2 +- drivers/scsi/st.c | 2 +- drivers/scsi/ufs/ufshcd.c | 6 +-- include/linux/blk_types.h | 39 +-------------- include/linux/blkdev.h | 49 ++++++++++++++++++- include/scsi/scsi_device.h | 4 +- 38 files changed, 242 insertions(+), 218 deletions(-) (limited to 'include/linux') diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 918e1e0d0e78..6acea160298c 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -348,7 +348,7 @@ Drivers can now specify a request prepare function (q->prep_rq_fn) that the block layer would invoke to pre-build device commands for a given request, or perform other preparatory processing for the request. This is routine is called by elv_next_request(), i.e. typically just before servicing a request. -(The prepare function would not be called for requests that have REQ_DONTPREP +(The prepare function would not be called for requests that have RQF_DONTPREP enabled) Aside: diff --git a/block/blk-core.c b/block/blk-core.c index e4eda5d2aa56..fd416651a676 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -145,13 +145,13 @@ static void req_bio_endio(struct request *rq, struct bio *bio, if (error) bio->bi_error = error; - if (unlikely(rq->cmd_flags & REQ_QUIET)) + if (unlikely(rq->rq_flags & RQF_QUIET)) bio_set_flag(bio, BIO_QUIET); bio_advance(bio, nbytes); /* don't actually finish bio if it's part of flush sequence */ - if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) + if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ)) bio_endio(bio); } @@ -899,7 +899,7 @@ EXPORT_SYMBOL(blk_get_queue); static inline void blk_free_request(struct request_list *rl, struct request *rq) { - if (rq->cmd_flags & REQ_ELVPRIV) { + if (rq->rq_flags & RQF_ELVPRIV) { elv_put_request(rl->q, rq); if (rq->elv.icq) put_io_context(rq->elv.icq->ioc); @@ -961,14 +961,14 @@ static void __freed_request(struct request_list *rl, int sync) * A request has just been released. Account for it, update the full and * congestion status, wake up any waiters. Called under q->queue_lock. */ -static void freed_request(struct request_list *rl, int op, unsigned int flags) +static void freed_request(struct request_list *rl, bool sync, + req_flags_t rq_flags) { struct request_queue *q = rl->q; - int sync = rw_is_sync(op, flags); q->nr_rqs[sync]--; rl->count[sync]--; - if (flags & REQ_ELVPRIV) + if (rq_flags & RQF_ELVPRIV) q->nr_rqs_elvpriv--; __freed_request(rl, sync); @@ -1079,6 +1079,7 @@ static struct request *__get_request(struct request_list *rl, int op, struct io_cq *icq = NULL; const bool is_sync = rw_is_sync(op, op_flags) != 0; int may_queue; + req_flags_t rq_flags = RQF_ALLOCED; if (unlikely(blk_queue_dying(q))) return ERR_PTR(-ENODEV); @@ -1127,7 +1128,7 @@ static struct request *__get_request(struct request_list *rl, int op, /* * Decide whether the new request will be managed by elevator. If - * so, mark @op_flags and increment elvpriv. Non-zero elvpriv will + * so, mark @rq_flags and increment elvpriv. Non-zero elvpriv will * prevent the current elevator from being destroyed until the new * request is freed. This guarantees icq's won't be destroyed and * makes creating new ones safe. @@ -1136,14 +1137,14 @@ static struct request *__get_request(struct request_list *rl, int op, * it will be created after releasing queue_lock. */ if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) { - op_flags |= REQ_ELVPRIV; + rq_flags |= RQF_ELVPRIV; q->nr_rqs_elvpriv++; if (et->icq_cache && ioc) icq = ioc_lookup_icq(ioc, q); } if (blk_queue_io_stat(q)) - op_flags |= REQ_IO_STAT; + rq_flags |= RQF_IO_STAT; spin_unlock_irq(q->queue_lock); /* allocate and init request */ @@ -1153,10 +1154,11 @@ static struct request *__get_request(struct request_list *rl, int op, blk_rq_init(q, rq); blk_rq_set_rl(rq, rl); - req_set_op_attrs(rq, op, op_flags | REQ_ALLOCED); + req_set_op_attrs(rq, op, op_flags); + rq->rq_flags = rq_flags; /* init elvpriv */ - if (op_flags & REQ_ELVPRIV) { + if (rq_flags & RQF_ELVPRIV) { if (unlikely(et->icq_cache && !icq)) { if (ioc) icq = ioc_create_icq(ioc, q, gfp_mask); @@ -1195,7 +1197,7 @@ fail_elvpriv: printk_ratelimited(KERN_WARNING "%s: dev %s: request aux data allocation failed, iosched may be disturbed\n", __func__, dev_name(q->backing_dev_info.dev)); - rq->cmd_flags &= ~REQ_ELVPRIV; + rq->rq_flags &= ~RQF_ELVPRIV; rq->elv.icq = NULL; spin_lock_irq(q->queue_lock); @@ -1212,7 +1214,7 @@ fail_alloc: * queue, but this is pretty rare. */ spin_lock_irq(q->queue_lock); - freed_request(rl, op, op_flags); + freed_request(rl, is_sync, rq_flags); /* * in the very unlikely event that allocation failed and no @@ -1347,7 +1349,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq) blk_clear_rq_complete(rq); trace_block_rq_requeue(q, rq); - if (rq->cmd_flags & REQ_QUEUED) + if (rq->rq_flags & RQF_QUEUED) blk_queue_end_tag(q, rq); BUG_ON(blk_queued_rq(rq)); @@ -1409,7 +1411,7 @@ EXPORT_SYMBOL_GPL(part_round_stats); #ifdef CONFIG_PM static void blk_pm_put_request(struct request *rq) { - if (rq->q->dev && !(rq->cmd_flags & REQ_PM) && !--rq->q->nr_pending) + if (rq->q->dev && !(rq->rq_flags & RQF_PM) && !--rq->q->nr_pending) pm_runtime_mark_last_busy(rq->q->dev); } #else @@ -1421,6 +1423,8 @@ static inline void blk_pm_put_request(struct request *rq) {} */ void __blk_put_request(struct request_queue *q, struct request *req) { + req_flags_t rq_flags = req->rq_flags; + if (unlikely(!q)) return; @@ -1440,16 +1444,15 @@ void __blk_put_request(struct request_queue *q, struct request *req) * Request may not have originated from ll_rw_blk. if not, * it didn't come out of our reserved rq pools */ - if (req->cmd_flags & REQ_ALLOCED) { - unsigned int flags = req->cmd_flags; - int op = req_op(req); + if (rq_flags & RQF_ALLOCED) { struct request_list *rl = blk_rq_rl(req); + bool sync = rw_is_sync(req_op(req), req->cmd_flags); BUG_ON(!list_empty(&req->queuelist)); BUG_ON(ELV_ON_HASH(req)); blk_free_request(rl, req); - freed_request(rl, op, flags); + freed_request(rl, sync, rq_flags); blk_put_rl(rl); } } @@ -2214,7 +2217,7 @@ unsigned int blk_rq_err_bytes(const struct request *rq) unsigned int bytes = 0; struct bio *bio; - if (!(rq->cmd_flags & REQ_MIXED_MERGE)) + if (!(rq->rq_flags & RQF_MIXED_MERGE)) return blk_rq_bytes(rq); /* @@ -2257,7 +2260,7 @@ void blk_account_io_done(struct request *req) * normal IO on queueing nor completion. Accounting the * containing request is enough. */ - if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) { + if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) { unsigned long duration = jiffies - req->start_time; const int rw = rq_data_dir(req); struct hd_struct *part; @@ -2285,7 +2288,7 @@ static struct request *blk_pm_peek_request(struct request_queue *q, struct request *rq) { if (q->dev && (q->rpm_status == RPM_SUSPENDED || - (q->rpm_status != RPM_ACTIVE && !(rq->cmd_flags & REQ_PM)))) + (q->rpm_status != RPM_ACTIVE && !(rq->rq_flags & RQF_PM)))) return NULL; else return rq; @@ -2361,13 +2364,13 @@ struct request *blk_peek_request(struct request_queue *q) if (!rq) break; - if (!(rq->cmd_flags & REQ_STARTED)) { + if (!(rq->rq_flags & RQF_STARTED)) { /* * This is the first time the device driver * sees this request (possibly after * requeueing). Notify IO scheduler. */ - if (rq->cmd_flags & REQ_SORTED) + if (rq->rq_flags & RQF_SORTED) elv_activate_rq(q, rq); /* @@ -2375,7 +2378,7 @@ struct request *blk_peek_request(struct request_queue *q) * it, a request that has been delayed should * not be passed by new incoming requests */ - rq->cmd_flags |= REQ_STARTED; + rq->rq_flags |= RQF_STARTED; trace_block_rq_issue(q, rq); } @@ -2384,7 +2387,7 @@ struct request *blk_peek_request(struct request_queue *q) q->boundary_rq = NULL; } - if (rq->cmd_flags & REQ_DONTPREP) + if (rq->rq_flags & RQF_DONTPREP) break; if (q->dma_drain_size && blk_rq_bytes(rq)) { @@ -2407,11 +2410,11 @@ struct request *blk_peek_request(struct request_queue *q) /* * the request may have been (partially) prepped. * we need to keep this request in the front to - * avoid resource deadlock. REQ_STARTED will + * avoid resource deadlock. RQF_STARTED will * prevent other fs requests from passing this one. */ if (q->dma_drain_size && blk_rq_bytes(rq) && - !(rq->cmd_flags & REQ_DONTPREP)) { + !(rq->rq_flags & RQF_DONTPREP)) { /* * remove the space for the drain we added * so that we don't add it again @@ -2424,7 +2427,7 @@ struct request *blk_peek_request(struct request_queue *q) } else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) { int err = (ret == BLKPREP_INVALID) ? -EREMOTEIO : -EIO; - rq->cmd_flags |= REQ_QUIET; + rq->rq_flags |= RQF_QUIET; /* * Mark this request as started so we don't trigger * any debug logic in the end I/O path. @@ -2561,7 +2564,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) req->errors = 0; if (error && req->cmd_type == REQ_TYPE_FS && - !(req->cmd_flags & REQ_QUIET)) { + !(req->rq_flags & RQF_QUIET)) { char *error_type; switch (error) { @@ -2634,7 +2637,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) req->__sector += total_bytes >> 9; /* mixed attributes always follow the first bio */ - if (req->cmd_flags & REQ_MIXED_MERGE) { + if (req->rq_flags & RQF_MIXED_MERGE) { req->cmd_flags &= ~REQ_FAILFAST_MASK; req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK; } @@ -2687,7 +2690,7 @@ void blk_unprep_request(struct request *req) { struct request_queue *q = req->q; - req->cmd_flags &= ~REQ_DONTPREP; + req->rq_flags &= ~RQF_DONTPREP; if (q->unprep_rq_fn) q->unprep_rq_fn(q, req); } @@ -2698,7 +2701,7 @@ EXPORT_SYMBOL_GPL(blk_unprep_request); */ void blk_finish_request(struct request *req, int error) { - if (req->cmd_flags & REQ_QUEUED) + if (req->rq_flags & RQF_QUEUED) blk_queue_end_tag(req->q, req); BUG_ON(blk_queued_rq(req)); @@ -2708,7 +2711,7 @@ void blk_finish_request(struct request *req, int error) blk_delete_timer(req); - if (req->cmd_flags & REQ_DONTPREP) + if (req->rq_flags & RQF_DONTPREP) blk_unprep_request(req); blk_account_io_done(req); diff --git a/block/blk-exec.c b/block/blk-exec.c index 7ea04325d02f..3ecb00a6cf45 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -72,7 +72,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, spin_lock_irq(q->queue_lock); if (unlikely(blk_queue_dying(q))) { - rq->cmd_flags |= REQ_QUIET; + rq->rq_flags |= RQF_QUIET; rq->errors = -ENXIO; __blk_end_request_all(rq, rq->errors); spin_unlock_irq(q->queue_lock); diff --git a/block/blk-flush.c b/block/blk-flush.c index 6a14b68b9135..3990b9cfbda5 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -56,7 +56,7 @@ * Once while executing DATA and again after the whole sequence is * complete. The first completion updates the contained bio but doesn't * finish it so that the bio submitter is notified only after the whole - * sequence is complete. This is implemented by testing REQ_FLUSH_SEQ in + * sequence is complete. This is implemented by testing RQF_FLUSH_SEQ in * req_bio_endio(). * * The above peculiarity requires that each FLUSH/FUA request has only one @@ -127,7 +127,7 @@ static void blk_flush_restore_request(struct request *rq) rq->bio = rq->biotail; /* make @rq a normal request */ - rq->cmd_flags &= ~REQ_FLUSH_SEQ; + rq->rq_flags &= ~RQF_FLUSH_SEQ; rq->end_io = rq->flush.saved_end_io; } @@ -330,7 +330,8 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq) } flush_rq->cmd_type = REQ_TYPE_FS; - req_set_op_attrs(flush_rq, REQ_OP_FLUSH, WRITE_FLUSH | REQ_FLUSH_SEQ); + req_set_op_attrs(flush_rq, REQ_OP_FLUSH, WRITE_FLUSH); + flush_rq->rq_flags |= RQF_FLUSH_SEQ; flush_rq->rq_disk = first_rq->rq_disk; flush_rq->end_io = flush_end_io; @@ -433,7 +434,7 @@ void blk_insert_flush(struct request *rq) */ memset(&rq->flush, 0, sizeof(rq->flush)); INIT_LIST_HEAD(&rq->flush.list); - rq->cmd_flags |= REQ_FLUSH_SEQ; + rq->rq_flags |= RQF_FLUSH_SEQ; rq->flush.saved_end_io = rq->end_io; /* Usually NULL */ if (q->mq_ops) { rq->end_io = mq_flush_data_end_io; diff --git a/block/blk-map.c b/block/blk-map.c index b8657fa8dc9a..2c5ae5fef473 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -135,7 +135,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, } while (iov_iter_count(&i)); if (!bio_flagged(bio, BIO_USER_MAPPED)) - rq->cmd_flags |= REQ_COPY_USER; + rq->rq_flags |= RQF_COPY_USER; return 0; unmap_rq: @@ -232,7 +232,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, bio_set_op_attrs(bio, REQ_OP_WRITE, 0); if (do_copy) - rq->cmd_flags |= REQ_COPY_USER; + rq->rq_flags |= RQF_COPY_USER; ret = blk_rq_append_bio(rq, bio); if (unlikely(ret)) { diff --git a/block/blk-merge.c b/block/blk-merge.c index 2642e5fc8b69..fda6a12fc776 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -456,7 +456,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, if (rq->bio) nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg); - if (unlikely(rq->cmd_flags & REQ_COPY_USER) && + if (unlikely(rq->rq_flags & RQF_COPY_USER) && (blk_rq_bytes(rq) & q->dma_pad_mask)) { unsigned int pad_len = (q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1; @@ -634,7 +634,7 @@ void blk_rq_set_mixed_merge(struct request *rq) unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; struct bio *bio; - if (rq->cmd_flags & REQ_MIXED_MERGE) + if (rq->rq_flags & RQF_MIXED_MERGE) return; /* @@ -647,7 +647,7 @@ void blk_rq_set_mixed_merge(struct request *rq) (bio->bi_opf & REQ_FAILFAST_MASK) != ff); bio->bi_opf |= ff; } - rq->cmd_flags |= REQ_MIXED_MERGE; + rq->rq_flags |= RQF_MIXED_MERGE; } static void blk_account_io_merge(struct request *req) @@ -709,7 +709,7 @@ static int attempt_merge(struct request_queue *q, struct request *req, * makes sure that all involved bios have mixable attributes * set properly. */ - if ((req->cmd_flags | next->cmd_flags) & REQ_MIXED_MERGE || + if (((req->rq_flags | next->rq_flags) & RQF_MIXED_MERGE) || (req->cmd_flags & REQ_FAILFAST_MASK) != (next->cmd_flags & REQ_FAILFAST_MASK)) { blk_rq_set_mixed_merge(req); diff --git a/block/blk-mq.c b/block/blk-mq.c index d74a74a9f9ef..b49c6658eb05 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -142,14 +142,13 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, struct request *rq, int op, unsigned int op_flags) { - if (blk_queue_io_stat(q)) - op_flags |= REQ_IO_STAT; - INIT_LIST_HEAD(&rq->queuelist); /* csd/requeue_work/fifo_time is initialized before use */ rq->q = q; rq->mq_ctx = ctx; req_set_op_attrs(rq, op, op_flags); + if (blk_queue_io_stat(q)) + rq->rq_flags |= RQF_IO_STAT; /* do not touch atomic flags, it needs atomic ops against the timer */ rq->cpu = -1; INIT_HLIST_NODE(&rq->hash); @@ -198,7 +197,7 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int op, int op_flags) rq = data->hctx->tags->rqs[tag]; if (blk_mq_tag_busy(data->hctx)) { - rq->cmd_flags = REQ_MQ_INFLIGHT; + rq->rq_flags = RQF_MQ_INFLIGHT; atomic_inc(&data->hctx->nr_active); } @@ -298,9 +297,9 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, const int tag = rq->tag; struct request_queue *q = rq->q; - if (rq->cmd_flags & REQ_MQ_INFLIGHT) + if (rq->rq_flags & RQF_MQ_INFLIGHT) atomic_dec(&hctx->nr_active); - rq->cmd_flags = 0; + rq->rq_flags = 0; clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); blk_mq_put_tag(hctx, ctx, tag); @@ -489,10 +488,10 @@ static void blk_mq_requeue_work(struct work_struct *work) spin_unlock_irqrestore(&q->requeue_lock, flags); list_for_each_entry_safe(rq, next, &rq_list, queuelist) { - if (!(rq->cmd_flags & REQ_SOFTBARRIER)) + if (!(rq->rq_flags & RQF_SOFTBARRIER)) continue; - rq->cmd_flags &= ~REQ_SOFTBARRIER; + rq->rq_flags &= ~RQF_SOFTBARRIER; list_del_init(&rq->queuelist); blk_mq_insert_request(rq, true, false, false); } @@ -519,11 +518,11 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head) * We abuse this flag that is otherwise used by the I/O scheduler to * request head insertation from the workqueue. */ - BUG_ON(rq->cmd_flags & REQ_SOFTBARRIER); + BUG_ON(rq->rq_flags & RQF_SOFTBARRIER); spin_lock_irqsave(&q->requeue_lock, flags); if (at_head) { - rq->cmd_flags |= REQ_SOFTBARRIER; + rq->rq_flags |= RQF_SOFTBARRIER; list_add(&rq->queuelist, &q->requeue_list); } else { list_add_tail(&rq->queuelist, &q->requeue_list); diff --git a/block/blk-tag.c b/block/blk-tag.c index f0344e6939d5..bae1decb6ec3 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -270,7 +270,7 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq) BUG_ON(tag >= bqt->real_max_depth); list_del_init(&rq->queuelist); - rq->cmd_flags &= ~REQ_QUEUED; + rq->rq_flags &= ~RQF_QUEUED; rq->tag = -1; if (unlikely(bqt->tag_index[tag] == NULL)) @@ -316,7 +316,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq) unsigned max_depth; int tag; - if (unlikely((rq->cmd_flags & REQ_QUEUED))) { + if (unlikely((rq->rq_flags & RQF_QUEUED))) { printk(KERN_ERR "%s: request %p for device [%s] already tagged %d", __func__, rq, @@ -371,7 +371,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq) */ bqt->next_tag = (tag + 1) % bqt->max_depth; - rq->cmd_flags |= REQ_QUEUED; + rq->rq_flags |= RQF_QUEUED; rq->tag = tag; bqt->tag_index[tag] = rq; blk_start_request(rq); diff --git a/block/blk.h b/block/blk.h index 74444c49078f..aa132dea598c 100644 --- a/block/blk.h +++ b/block/blk.h @@ -130,7 +130,7 @@ static inline void blk_clear_rq_complete(struct request *rq) /* * Internal elevator interface */ -#define ELV_ON_HASH(rq) ((rq)->cmd_flags & REQ_HASHED) +#define ELV_ON_HASH(rq) ((rq)->rq_flags & RQF_HASHED) void blk_insert_flush(struct request *rq); @@ -247,7 +247,7 @@ extern int blk_update_nr_requests(struct request_queue *, unsigned int); static inline int blk_do_io_stat(struct request *rq) { return rq->rq_disk && - (rq->cmd_flags & REQ_IO_STAT) && + (rq->rq_flags & RQF_IO_STAT) && (rq->cmd_type == REQ_TYPE_FS); } diff --git a/block/elevator.c b/block/elevator.c index f7d973a56fd7..ac80f89a0842 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -245,7 +245,7 @@ EXPORT_SYMBOL(elevator_exit); static inline void __elv_rqhash_del(struct request *rq) { hash_del(&rq->hash); - rq->cmd_flags &= ~REQ_HASHED; + rq->rq_flags &= ~RQF_HASHED; } static void elv_rqhash_del(struct request_queue *q, struct request *rq) @@ -260,7 +260,7 @@ static void elv_rqhash_add(struct request_queue *q, struct request *rq) BUG_ON(ELV_ON_HASH(rq)); hash_add(e->hash, &rq->hash, rq_hash_key(rq)); - rq->cmd_flags |= REQ_HASHED; + rq->rq_flags |= RQF_HASHED; } static void elv_rqhash_reposition(struct request_queue *q, struct request *rq) @@ -352,7 +352,6 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq) { sector_t boundary; struct list_head *entry; - int stop_flags; if (q->last_merge == rq) q->last_merge = NULL; @@ -362,7 +361,6 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq) q->nr_sorted--; boundary = q->end_sector; - stop_flags = REQ_SOFTBARRIER | REQ_STARTED; list_for_each_prev(entry, &q->queue_head) { struct request *pos = list_entry_rq(entry); @@ -370,7 +368,7 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq) break; if (rq_data_dir(rq) != rq_data_dir(pos)) break; - if (pos->cmd_flags & stop_flags) + if (pos->rq_flags & (RQF_STARTED | RQF_SOFTBARRIER)) break; if (blk_rq_pos(rq) >= boundary) { if (blk_rq_pos(pos) < boundary) @@ -510,7 +508,7 @@ void elv_merge_requests(struct request_queue *q, struct request *rq, struct request *next) { struct elevator_queue *e = q->elevator; - const int next_sorted = next->cmd_flags & REQ_SORTED; + const int next_sorted = next->rq_flags & RQF_SORTED; if (next_sorted && e->type->ops.elevator_merge_req_fn) e->type->ops.elevator_merge_req_fn(q, rq, next); @@ -537,13 +535,13 @@ void elv_bio_merged(struct request_queue *q, struct request *rq, #ifdef CONFIG_PM static void blk_pm_requeue_request(struct request *rq) { - if (rq->q->dev && !(rq->cmd_flags & REQ_PM)) + if (rq->q->dev && !(rq->rq_flags & RQF_PM)) rq->q->nr_pending--; } static void blk_pm_add_request(struct request_queue *q, struct request *rq) { - if (q->dev && !(rq->cmd_flags & REQ_PM) && q->nr_pending++ == 0 && + if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 && (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING)) pm_request_resume(q->dev); } @@ -563,11 +561,11 @@ void elv_requeue_request(struct request_queue *q, struct request *rq) */ if (blk_account_rq(rq)) { q->in_flight[rq_is_sync(rq)]--; - if (rq->cmd_flags & REQ_SORTED) + if (rq->rq_flags & RQF_SORTED) elv_deactivate_rq(q, rq); } - rq->cmd_flags &= ~REQ_STARTED; + rq->rq_flags &= ~RQF_STARTED; blk_pm_requeue_request(rq); @@ -597,13 +595,13 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) rq->q = q; - if (rq->cmd_flags & REQ_SOFTBARRIER) { + if (rq->rq_flags & RQF_SOFTBARRIER) { /* barriers are scheduling boundary, update end_sector */ if (rq->cmd_type == REQ_TYPE_FS) { q->end_sector = rq_end_sector(rq); q->boundary_rq = rq; } - } else if (!(rq->cmd_flags & REQ_ELVPRIV) && + } else if (!(rq->rq_flags & RQF_ELVPRIV) && (where == ELEVATOR_INSERT_SORT || where == ELEVATOR_INSERT_SORT_MERGE)) where = ELEVATOR_INSERT_BACK; @@ -611,12 +609,12 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) switch (where) { case ELEVATOR_INSERT_REQUEUE: case ELEVATOR_INSERT_FRONT: - rq->cmd_flags |= REQ_SOFTBARRIER; + rq->rq_flags |= RQF_SOFTBARRIER; list_add(&rq->queuelist, &q->queue_head); break; case ELEVATOR_INSERT_BACK: - rq->cmd_flags |= REQ_SOFTBARRIER; + rq->rq_flags |= RQF_SOFTBARRIER; elv_drain_elevator(q); list_add_tail(&rq->queuelist, &q->queue_head); /* @@ -642,7 +640,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) break; case ELEVATOR_INSERT_SORT: BUG_ON(rq->cmd_type != REQ_TYPE_FS); - rq->cmd_flags |= REQ_SORTED; + rq->rq_flags |= RQF_SORTED; q->nr_sorted++; if (rq_mergeable(rq)) { elv_rqhash_add(q, rq); @@ -659,7 +657,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) break; case ELEVATOR_INSERT_FLUSH: - rq->cmd_flags |= REQ_SOFTBARRIER; + rq->rq_flags |= RQF_SOFTBARRIER; blk_insert_flush(rq); break; default: @@ -735,7 +733,7 @@ void elv_completed_request(struct request_queue *q, struct request *rq) */ if (blk_account_rq(rq)) { q->in_flight[rq_is_sync(rq)]--; - if ((rq->cmd_flags & REQ_SORTED) && + if ((rq->rq_flags & RQF_SORTED) && e->type->ops.elevator_completed_req_fn) e->type->ops.elevator_completed_req_fn(q, rq); } diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 90fa4ac149db..7cf795e0fc8d 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -721,7 +721,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * rq->timeout = 60*HZ; if (cgc->quiet) - rq->cmd_flags |= REQ_QUIET; + rq->rq_flags |= RQF_QUIET; blk_execute_rq(rq->q, pd->bdev->bd_disk, rq, 0); if (rq->errors) diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 05352f490d60..f90ea221f7f2 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -211,7 +211,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) sense_rq->cmd[0] = GPCMD_REQUEST_SENSE; sense_rq->cmd[4] = cmd_len; sense_rq->cmd_type = REQ_TYPE_ATA_SENSE; - sense_rq->cmd_flags |= REQ_PREEMPT; + sense_rq->rq_flags |= RQF_PREEMPT; if (drive->media == ide_tape) sense_rq->cmd[13] = REQ_IDETAPE_PC1; @@ -295,7 +295,7 @@ int ide_cd_expiry(ide_drive_t *drive) wait = ATAPI_WAIT_PC; break; default: - if (!(rq->cmd_flags & REQ_QUIET)) + if (!(rq->rq_flags & RQF_QUIET)) printk(KERN_INFO PFX "cmd 0x%x timed out\n", rq->cmd[0]); wait = 0; @@ -375,7 +375,7 @@ int ide_check_ireason(ide_drive_t *drive, struct request *rq, int len, } if (dev_is_idecd(drive) && rq->cmd_type == REQ_TYPE_ATA_PC) - rq->cmd_flags |= REQ_FAILED; + rq->rq_flags |= RQF_FAILED; return 1; } diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index bf9a2ad296ed..9cbd217bc0c9 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -98,7 +98,7 @@ static int cdrom_log_sense(ide_drive_t *drive, struct request *rq) struct request_sense *sense = &drive->sense_data; int log = 0; - if (!sense || !rq || (rq->cmd_flags & REQ_QUIET)) + if (!sense || !rq || (rq->rq_flags & RQF_QUIET)) return 0; ide_debug_log(IDE_DBG_SENSE, "sense_key: 0x%x", sense->sense_key); @@ -291,7 +291,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) * (probably while trying to recover from a former error). * Just give up. */ - rq->cmd_flags |= REQ_FAILED; + rq->rq_flags |= RQF_FAILED; return 2; } @@ -311,7 +311,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) cdrom_saw_media_change(drive); if (rq->cmd_type == REQ_TYPE_FS && - !(rq->cmd_flags & REQ_QUIET)) + !(rq->rq_flags & RQF_QUIET)) printk(KERN_ERR PFX "%s: tray open\n", drive->name); } @@ -346,7 +346,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) * No point in retrying after an illegal request or data * protect error. */ - if (!(rq->cmd_flags & REQ_QUIET)) + if (!(rq->rq_flags & RQF_QUIET)) ide_dump_status(drive, "command error", stat); do_end_request = 1; break; @@ -355,14 +355,14 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) * No point in re-trying a zillion times on a bad sector. * If we got here the error is not correctable. */ - if (!(rq->cmd_flags & REQ_QUIET)) + if (!(rq->rq_flags & RQF_QUIET)) ide_dump_status(drive, "media error " "(bad sector)", stat); do_end_request = 1; break; case BLANK_CHECK: /* disk appears blank? */ - if (!(rq->cmd_flags & REQ_QUIET)) + if (!(rq->rq_flags & RQF_QUIET)) ide_dump_status(drive, "media error (blank)", stat); do_end_request = 1; @@ -380,7 +380,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) } if (rq->cmd_type != REQ_TYPE_FS) { - rq->cmd_flags |= REQ_FAILED; + rq->rq_flags |= RQF_FAILED; do_end_request = 1; } @@ -422,19 +422,19 @@ static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct ide_cmd *cmd) int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, int write, void *buffer, unsigned *bufflen, struct request_sense *sense, int timeout, - unsigned int cmd_flags) + req_flags_t rq_flags) { struct cdrom_info *info = drive->driver_data; struct request_sense local_sense; int retries = 10; - unsigned int flags = 0; + req_flags_t flags = 0; if (!sense) sense = &local_sense; ide_debug_log(IDE_DBG_PC, "cmd[0]: 0x%x, write: 0x%x, timeout: %d, " - "cmd_flags: 0x%x", - cmd[0], write, timeout, cmd_flags); + "rq_flags: 0x%x", + cmd[0], write, timeout, rq_flags); /* start of retry loop */ do { @@ -446,7 +446,7 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, memcpy(rq->cmd, cmd, BLK_MAX_CDB); rq->cmd_type = REQ_TYPE_ATA_PC; rq->sense = sense; - rq->cmd_flags |= cmd_flags; + rq->rq_flags |= rq_flags; rq->timeout = timeout; if (buffer) { error = blk_rq_map_kern(drive->queue, rq, buffer, @@ -462,14 +462,14 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, if (buffer) *bufflen = rq->resid_len; - flags = rq->cmd_flags; + flags = rq->rq_flags; blk_put_request(rq); /* * FIXME: we should probably abort/retry or something in case of * failure. */ - if (flags & REQ_FAILED) { + if (flags & RQF_FAILED) { /* * The request failed. Retry if it was due to a unit * attention status (usually means media was changed). @@ -494,10 +494,10 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, } /* end of retry loop */ - } while ((flags & REQ_FAILED) && retries >= 0); + } while ((flags & RQF_FAILED) && retries >= 0); /* return an error if the command failed */ - return (flags & REQ_FAILED) ? -EIO : 0; + return (flags & RQF_FAILED) ? -EIO : 0; } /* @@ -589,7 +589,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) "(%u bytes)\n", drive->name, __func__, cmd->nleft); if (!write) - rq->cmd_flags |= REQ_FAILED; + rq->rq_flags |= RQF_FAILED; uptodate = 0; } } else if (rq->cmd_type != REQ_TYPE_BLOCK_PC) { @@ -607,7 +607,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) } if (!uptodate) - rq->cmd_flags |= REQ_FAILED; + rq->rq_flags |= RQF_FAILED; } goto out_end; } @@ -745,9 +745,9 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) rq->cmd[0], rq->cmd_type); if (rq->cmd_type == REQ_TYPE_BLOCK_PC) - rq->cmd_flags |= REQ_QUIET; + rq->rq_flags |= RQF_QUIET; else - rq->cmd_flags &= ~REQ_FAILED; + rq->rq_flags &= ~RQF_FAILED; drive->dma = 0; @@ -867,7 +867,7 @@ int cdrom_check_status(ide_drive_t *drive, struct request_sense *sense) */ cmd[7] = cdi->sanyo_slot % 3; - return ide_cd_queue_pc(drive, cmd, 0, NULL, NULL, sense, 0, REQ_QUIET); + return ide_cd_queue_pc(drive, cmd, 0, NULL, NULL, sense, 0, RQF_QUIET); } static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity, @@ -890,7 +890,7 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity, cmd[0] = GPCMD_READ_CDVD_CAPACITY; stat = ide_cd_queue_pc(drive, cmd, 0, &capbuf, &len, sense, 0, - REQ_QUIET); + RQF_QUIET); if (stat) return stat; @@ -943,7 +943,7 @@ static int cdrom_read_tocentry(ide_drive_t *drive, int trackno, int msf_flag, if (msf_flag) cmd[1] = 2; - return ide_cd_queue_pc(drive, cmd, 0, buf, &buflen, sense, 0, REQ_QUIET); + return ide_cd_queue_pc(drive, cmd, 0, buf, &buflen, sense, 0, RQF_QUIET); } /* Try to read the entire TOC for the disk into our internal buffer. */ diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h index 1efc936f5b66..eea60c986c4f 100644 --- a/drivers/ide/ide-cd.h +++ b/drivers/ide/ide-cd.h @@ -101,7 +101,7 @@ void ide_cd_log_error(const char *, struct request *, struct request_sense *); /* ide-cd.c functions used by ide-cd_ioctl.c */ int ide_cd_queue_pc(ide_drive_t *, const unsigned char *, int, void *, - unsigned *, struct request_sense *, int, unsigned int); + unsigned *, struct request_sense *, int, req_flags_t); int ide_cd_read_toc(ide_drive_t *, struct request_sense *); int ide_cdrom_get_capabilities(ide_drive_t *, u8 *); void ide_cdrom_update_speed(ide_drive_t *, u8 *); diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c index 5887a7a09e37..f085e3a2e1d6 100644 --- a/drivers/ide/ide-cd_ioctl.c +++ b/drivers/ide/ide-cd_ioctl.c @@ -305,7 +305,7 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi) rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); rq->cmd_type = REQ_TYPE_DRV_PRIV; - rq->cmd_flags = REQ_QUIET; + rq->rq_flags = RQF_QUIET; ret = blk_execute_rq(drive->queue, cd->disk, rq, 0); blk_put_request(rq); /* @@ -449,7 +449,7 @@ int ide_cdrom_packet(struct cdrom_device_info *cdi, struct packet_command *cgc) { ide_drive_t *drive = cdi->handle; - unsigned int flags = 0; + req_flags_t flags = 0; unsigned len = cgc->buflen; if (cgc->timeout <= 0) @@ -463,7 +463,7 @@ int ide_cdrom_packet(struct cdrom_device_info *cdi, memset(cgc->sense, 0, sizeof(struct request_sense)); if (cgc->quiet) - flags |= REQ_QUIET; + flags |= RQF_QUIET; cgc->stat = ide_cd_queue_pc(drive, cgc->cmd, cgc->data_direction == CGC_DATA_WRITE, diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 669ea1e45795..6360bbd37efe 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -307,7 +307,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) { ide_startstop_t startstop; - BUG_ON(!(rq->cmd_flags & REQ_STARTED)); + BUG_ON(!(rq->rq_flags & RQF_STARTED)); #ifdef DEBUG printk("%s: start_request: current=0x%08lx\n", @@ -316,7 +316,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) /* bail early if we've exceeded max_failures */ if (drive->max_failures && (drive->failures > drive->max_failures)) { - rq->cmd_flags |= REQ_FAILED; + rq->rq_flags |= RQF_FAILED; goto kill_rq; } @@ -539,7 +539,7 @@ repeat: */ if ((drive->dev_flags & IDE_DFLAG_BLOCKED) && ata_pm_request(rq) == 0 && - (rq->cmd_flags & REQ_PREEMPT) == 0) { + (rq->rq_flags & RQF_PREEMPT) == 0) { /* there should be no pending command at this point */ ide_unlock_port(hwif); goto plug_device; diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index e34af488693a..a015acdffb39 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -53,7 +53,7 @@ static int ide_pm_execute_rq(struct request *rq) spin_lock_irq(q->queue_lock); if (unlikely(blk_queue_dying(q))) { - rq->cmd_flags |= REQ_QUIET; + rq->rq_flags |= RQF_QUIET; rq->errors = -ENXIO; __blk_end_request_all(rq, rq->errors); spin_unlock_irq(q->queue_lock); @@ -90,7 +90,7 @@ int generic_ide_resume(struct device *dev) memset(&rqpm, 0, sizeof(rqpm)); rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); rq->cmd_type = REQ_TYPE_ATA_PM_RESUME; - rq->cmd_flags |= REQ_PREEMPT; + rq->rq_flags |= RQF_PREEMPT; rq->special = &rqpm; rqpm.pm_step = IDE_PM_START_RESUME; rqpm.pm_state = PM_EVENT_ON; diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index dc75bea0d541..f76cc36b8546 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -313,7 +313,7 @@ static void dm_unprep_request(struct request *rq) if (!rq->q->mq_ops) { rq->special = NULL; - rq->cmd_flags &= ~REQ_DONTPREP; + rq->rq_flags &= ~RQF_DONTPREP; } if (clone) @@ -431,7 +431,7 @@ static void dm_softirq_done(struct request *rq) return; } - if (rq->cmd_flags & REQ_FAILED) + if (rq->rq_flags & RQF_FAILED) mapped = false; dm_done(clone, tio->error, mapped); @@ -460,7 +460,7 @@ static void dm_complete_request(struct request *rq, int error) */ static void dm_kill_unmapped_request(struct request *rq, int error) { - rq->cmd_flags |= REQ_FAILED; + rq->rq_flags |= RQF_FAILED; dm_complete_request(rq, error); } @@ -476,7 +476,7 @@ static void end_clone_request(struct request *clone, int error) * For just cleaning up the information of the queue in which * the clone was dispatched. * The clone is *NOT* freed actually here because it is alloced - * from dm own mempool (REQ_ALLOCED isn't set). + * from dm own mempool (RQF_ALLOCED isn't set). */ __blk_put_request(clone->q, clone); } @@ -497,7 +497,7 @@ static void dm_dispatch_clone_request(struct request *clone, struct request *rq) int r; if (blk_queue_io_stat(clone->q)) - clone->cmd_flags |= REQ_IO_STAT; + clone->rq_flags |= RQF_IO_STAT; clone->start_time = jiffies; r = blk_insert_cloned_request(clone->q, clone); @@ -633,7 +633,7 @@ static int dm_old_prep_fn(struct request_queue *q, struct request *rq) return BLKPREP_DEFER; rq->special = tio; - rq->cmd_flags |= REQ_DONTPREP; + rq->rq_flags |= RQF_DONTPREP; return BLKPREP_OK; } diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index aacf584f2a42..f3512404bc52 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -2006,7 +2006,7 @@ static int msb_prepare_req(struct request_queue *q, struct request *req) blk_dump_rq_flags(req, "MS unsupported request"); return BLKPREP_KILL; } - req->cmd_flags |= REQ_DONTPREP; + req->rq_flags |= RQF_DONTPREP; return BLKPREP_OK; } diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index c1472275fe57..fa0746d182ff 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -834,7 +834,7 @@ static int mspro_block_prepare_req(struct request_queue *q, struct request *req) return BLKPREP_KILL; } - req->cmd_flags |= REQ_DONTPREP; + req->rq_flags |= RQF_DONTPREP; return BLKPREP_OK; } diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index c3335112e68c..f8190dd4a35c 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -2117,7 +2117,7 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) mmc_blk_abort_packed_req(mq_rq); } else { if (mmc_card_removed(card)) - req->cmd_flags |= REQ_QUIET; + req->rq_flags |= RQF_QUIET; while (ret) ret = blk_end_request(req, -EIO, blk_rq_cur_bytes(req)); @@ -2126,7 +2126,7 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc) start_new_req: if (rqc) { if (mmc_card_removed(card)) { - rqc->cmd_flags |= REQ_QUIET; + rqc->rq_flags |= RQF_QUIET; blk_end_request_all(rqc, -EIO); } else { /* diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index 8037f73a109a..8a67f1c2ce21 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c @@ -44,7 +44,7 @@ static int mmc_prep_request(struct request_queue *q, struct request *req) if (mq && (mmc_card_removed(mq->card) || mmc_access_rpmb(mq))) return BLKPREP_KILL; - req->cmd_flags |= REQ_DONTPREP; + req->rq_flags |= RQF_DONTPREP; return BLKPREP_OK; } @@ -120,7 +120,7 @@ static void mmc_request_fn(struct request_queue *q) if (!mq) { while ((req = blk_fetch_request(q)) != NULL) { - req->cmd_flags |= REQ_QUIET; + req->rq_flags |= RQF_QUIET; __blk_end_request_all(req, -EIO); } return; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 0fc99f0f2571..0955e9d22020 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -323,9 +323,9 @@ static int nvme_init_iod(struct request *rq, unsigned size, iod->nents = 0; iod->length = size; - if (!(rq->cmd_flags & REQ_DONTPREP)) { + if (!(rq->rq_flags & RQF_DONTPREP)) { rq->retries = 0; - rq->cmd_flags |= REQ_DONTPREP; + rq->rq_flags |= RQF_DONTPREP; } return 0; } diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 241829e59668..05813a420188 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -154,7 +154,8 @@ static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff, return scsi_execute_req_flags(sdev, cdb, DMA_FROM_DEVICE, buff, bufflen, sshdr, ALUA_FAILOVER_TIMEOUT * HZ, - ALUA_FAILOVER_RETRIES, NULL, req_flags); + ALUA_FAILOVER_RETRIES, NULL, + req_flags, 0); } /* @@ -187,7 +188,8 @@ static int submit_stpg(struct scsi_device *sdev, int group_id, return scsi_execute_req_flags(sdev, cdb, DMA_TO_DEVICE, stpg_data, stpg_len, sshdr, ALUA_FAILOVER_TIMEOUT * HZ, - ALUA_FAILOVER_RETRIES, NULL, req_flags); + ALUA_FAILOVER_RETRIES, NULL, + req_flags, 0); } static struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size, @@ -1063,7 +1065,7 @@ static int alua_prep_fn(struct scsi_device *sdev, struct request *req) state != SCSI_ACCESS_STATE_ACTIVE && state != SCSI_ACCESS_STATE_LBA) { ret = BLKPREP_KILL; - req->cmd_flags |= REQ_QUIET; + req->rq_flags |= RQF_QUIET; } return ret; diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c index 375d81850f15..5b80746980b8 100644 --- a/drivers/scsi/device_handler/scsi_dh_emc.c +++ b/drivers/scsi/device_handler/scsi_dh_emc.c @@ -452,7 +452,7 @@ static int clariion_prep_fn(struct scsi_device *sdev, struct request *req) if (h->lun_state != CLARIION_LUN_OWNED) { ret = BLKPREP_KILL; - req->cmd_flags |= REQ_QUIET; + req->rq_flags |= RQF_QUIET; } return ret; diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c index 9406d5f4a3d3..308e87195dc1 100644 --- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c +++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c @@ -266,7 +266,7 @@ static int hp_sw_prep_fn(struct scsi_device *sdev, struct request *req) if (h->path_state != HP_SW_PATH_ACTIVE) { ret = BLKPREP_KILL; - req->cmd_flags |= REQ_QUIET; + req->rq_flags |= RQF_QUIET; } return ret; diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index 06fbd0b0c68a..00d9c326158e 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -724,7 +724,7 @@ static int rdac_prep_fn(struct scsi_device *sdev, struct request *req) if (h->state != RDAC_STATE_ACTIVE) { ret = BLKPREP_KILL; - req->cmd_flags |= REQ_QUIET; + req->rq_flags |= RQF_QUIET; } return ret; diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index 2f2a9910e30e..ef99f62831fb 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -1595,7 +1595,7 @@ static int _init_blk_request(struct osd_request *or, } or->request = req; - req->cmd_flags |= REQ_QUIET; + req->rq_flags |= RQF_QUIET; req->timeout = or->timeout; req->retries = or->retries; diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c index 5033223f6287..a2960f5d98ec 100644 --- a/drivers/scsi/osst.c +++ b/drivers/scsi/osst.c @@ -368,7 +368,7 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd, return DRIVER_ERROR << 24; blk_rq_set_block_pc(req); - req->cmd_flags |= REQ_QUIET; + req->rq_flags |= RQF_QUIET; SRpnt->bio = NULL; diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 106a6adbd6f1..996e134d79fa 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -1988,7 +1988,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev) req->cmd_len = COMMAND_SIZE(req->cmd[0]); - req->cmd_flags |= REQ_QUIET; + req->rq_flags |= RQF_QUIET; req->timeout = 10 * HZ; req->retries = 5; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 2cca9cffc63f..8c52622ac257 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -163,26 +163,11 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) { __scsi_queue_insert(cmd, reason, 1); } -/** - * scsi_execute - insert request and wait for the result - * @sdev: scsi device - * @cmd: scsi command - * @data_direction: data direction - * @buffer: data buffer - * @bufflen: len of buffer - * @sense: optional sense buffer - * @timeout: request timeout in seconds - * @retries: number of times to retry request - * @flags: or into request flags; - * @resid: optional residual length - * - * returns the req->errors value which is the scsi_cmnd result - * field. - */ -int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, + +static int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, unsigned char *sense, int timeout, int retries, u64 flags, - int *resid) + req_flags_t rq_flags, int *resid) { struct request *req; int write = (data_direction == DMA_TO_DEVICE); @@ -203,7 +188,8 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, req->sense_len = 0; req->retries = retries; req->timeout = timeout; - req->cmd_flags |= flags | REQ_QUIET | REQ_PREEMPT; + req->cmd_flags |= flags; + req->rq_flags |= rq_flags | RQF_QUIET | RQF_PREEMPT; /* * head injection *required* here otherwise quiesce won't work @@ -227,12 +213,37 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, return ret; } + +/** + * scsi_execute - insert request and wait for the result + * @sdev: scsi device + * @cmd: scsi command + * @data_direction: data direction + * @buffer: data buffer + * @bufflen: len of buffer + * @sense: optional sense buffer + * @timeout: request timeout in seconds + * @retries: number of times to retry request + * @flags: or into request flags; + * @resid: optional residual length + * + * returns the req->errors value which is the scsi_cmnd result + * field. + */ +int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, + int data_direction, void *buffer, unsigned bufflen, + unsigned char *sense, int timeout, int retries, u64 flags, + int *resid) +{ + return __scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense, + timeout, retries, flags, 0, resid); +} EXPORT_SYMBOL(scsi_execute); int scsi_execute_req_flags(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout, int retries, - int *resid, u64 flags) + int *resid, u64 flags, req_flags_t rq_flags) { char *sense = NULL; int result; @@ -242,8 +253,8 @@ int scsi_execute_req_flags(struct scsi_device *sdev, const unsigned char *cmd, if (!sense) return DRIVER_ERROR << 24; } - result = scsi_execute(sdev, cmd, data_direction, buffer, bufflen, - sense, timeout, retries, flags, resid); + result = __scsi_execute(sdev, cmd, data_direction, buffer, bufflen, + sense, timeout, retries, flags, rq_flags, resid); if (sshdr) scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, sshdr); @@ -813,7 +824,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) */ if ((sshdr.asc == 0x0) && (sshdr.ascq == 0x1d)) ; - else if (!(req->cmd_flags & REQ_QUIET)) + else if (!(req->rq_flags & RQF_QUIET)) scsi_print_sense(cmd); result = 0; /* BLOCK_PC may have set error */ @@ -943,7 +954,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) switch (action) { case ACTION_FAIL: /* Give up and fail the remainder of the request */ - if (!(req->cmd_flags & REQ_QUIET)) { + if (!(req->rq_flags & RQF_QUIET)) { static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); @@ -972,7 +983,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) * A new command will be prepared and issued. */ if (q->mq_ops) { - cmd->request->cmd_flags &= ~REQ_DONTPREP; + cmd->request->rq_flags &= ~RQF_DONTPREP; scsi_mq_uninit_cmd(cmd); scsi_mq_requeue_cmd(cmd); } else { @@ -1234,7 +1245,7 @@ scsi_prep_state_check(struct scsi_device *sdev, struct request *req) /* * If the devices is blocked we defer normal commands. */ - if (!(req->cmd_flags & REQ_PREEMPT)) + if (!(req->rq_flags & RQF_PREEMPT)) ret = BLKPREP_DEFER; break; default: @@ -1243,7 +1254,7 @@ scsi_prep_state_check(struct scsi_device *sdev, struct request *req) * special commands. In particular any user initiated * command is not allowed. */ - if (!(req->cmd_flags & REQ_PREEMPT)) + if (!(req->rq_flags & RQF_PREEMPT)) ret = BLKPREP_KILL; break; } @@ -1279,7 +1290,7 @@ scsi_prep_return(struct request_queue *q, struct request *req, int ret) blk_delay_queue(q, SCSI_QUEUE_DELAY); break; default: - req->cmd_flags |= REQ_DONTPREP; + req->rq_flags |= RQF_DONTPREP; } return ret; @@ -1736,7 +1747,7 @@ static void scsi_request_fn(struct request_queue *q) * we add the dev to the starved list so it eventually gets * a run when a tag is freed. */ - if (blk_queue_tagged(q) && !(req->cmd_flags & REQ_QUEUED)) { + if (blk_queue_tagged(q) && !(req->rq_flags & RQF_QUEUED)) { spin_lock_irq(shost->host_lock); if (list_empty(&sdev->starved_entry)) list_add_tail(&sdev->starved_entry, @@ -1903,11 +1914,11 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, goto out_dec_target_busy; - if (!(req->cmd_flags & REQ_DONTPREP)) { + if (!(req->rq_flags & RQF_DONTPREP)) { ret = prep_to_mq(scsi_mq_prep_fn(req)); if (ret) goto out_dec_host_busy; - req->cmd_flags |= REQ_DONTPREP; + req->rq_flags |= RQF_DONTPREP; } else { blk_mq_start_request(req); } @@ -1952,7 +1963,7 @@ out: * we hit an error, as we will never see this command * again. */ - if (req->cmd_flags & REQ_DONTPREP) + if (req->rq_flags & RQF_DONTPREP) scsi_mq_uninit_cmd(cmd); break; default: diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index b9618ffca829..cef1f78031d4 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1520,7 +1520,7 @@ static int sd_sync_cache(struct scsi_disk *sdkp) */ res = scsi_execute_req_flags(sdp, cmd, DMA_NONE, NULL, 0, &sshdr, timeout, SD_MAX_RETRIES, - NULL, REQ_PM); + NULL, 0, RQF_PM); if (res == 0) break; } @@ -1879,7 +1879,7 @@ static int sd_done(struct scsi_cmnd *SCpnt) good_bytes = 0; req->__data_len = blk_rq_bytes(req); - req->cmd_flags |= REQ_QUIET; + req->rq_flags |= RQF_QUIET; } } } @@ -3278,7 +3278,7 @@ static int sd_start_stop_device(struct scsi_disk *sdkp, int start) return -ENODEV; res = scsi_execute_req_flags(sdp, cmd, DMA_NONE, NULL, 0, &sshdr, - SD_TIMEOUT, SD_MAX_RETRIES, NULL, REQ_PM); + SD_TIMEOUT, SD_MAX_RETRIES, NULL, 0, RQF_PM); if (res) { sd_print_result(sdkp, "Start/Stop Unit failed", res); if (driver_byte(res) & DRIVER_SENSE) diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index d5b3bd915d9e..394ab490919c 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -348,7 +348,7 @@ void sd_zbc_complete(struct scsi_cmnd *cmd, * this case, so be quiet about the error. */ if (req_op(rq) == REQ_OP_ZONE_RESET) - rq->cmd_flags |= REQ_QUIET; + rq->rq_flags |= RQF_QUIET; break; case 0x21: /* diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 7af5226aa55b..3bc46a4abd43 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -546,7 +546,7 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd, return DRIVER_ERROR << 24; blk_rq_set_block_pc(req); - req->cmd_flags |= REQ_QUIET; + req->rq_flags |= RQF_QUIET; mdata->null_mapped = 1; diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 05c745663c10..cf549871c1ee 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -5590,7 +5590,7 @@ ufshcd_send_request_sense(struct ufs_hba *hba, struct scsi_device *sdp) ret = scsi_execute_req_flags(sdp, cmd, DMA_FROM_DEVICE, buffer, SCSI_SENSE_BUFFERSIZE, NULL, - msecs_to_jiffies(1000), 3, NULL, REQ_PM); + msecs_to_jiffies(1000), 3, NULL, 0, RQF_PM); if (ret) pr_err("%s: failed with err %d\n", __func__, ret); @@ -5652,11 +5652,11 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, /* * Current function would be generally called from the power management - * callbacks hence set the REQ_PM flag so that it doesn't resume the + * callbacks hence set the RQF_PM flag so that it doesn't resume the * already suspended childs. */ ret = scsi_execute_req_flags(sdp, cmd, DMA_NONE, NULL, 0, &sshdr, - START_STOP_TIMEOUT, 0, NULL, REQ_PM); + START_STOP_TIMEOUT, 0, NULL, 0, RQF_PM); if (ret) { sdev_printk(KERN_WARNING, sdp, "START_STOP failed for power mode: %d, result %x\n", diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 6df722de2e22..ec69a8fe3b29 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -167,26 +167,6 @@ enum rq_flag_bits { __REQ_PREFLUSH, /* request for cache flush */ __REQ_RAHEAD, /* read ahead, can fail anytime */ - /* request only flags */ - __REQ_SORTED, /* elevator knows about this request */ - __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ - __REQ_STARTED, /* drive already may have started this one */ - __REQ_DONTPREP, /* don't call prep for this one */ - __REQ_QUEUED, /* uses queueing */ - __REQ_ELVPRIV, /* elevator private data attached */ - __REQ_FAILED, /* set if the request failed */ - __REQ_QUIET, /* don't worry about errors */ - __REQ_PREEMPT, /* set for "ide_preempt" requests and also - for requests for which the SCSI "quiesce" - state must be ignored. */ - __REQ_ALLOCED, /* request came from our alloc pool */ - __REQ_COPY_USER, /* contains copies of user pages */ - __REQ_FLUSH_SEQ, /* request for flush sequence */ - __REQ_IO_STAT, /* account I/O stat */ - __REQ_MIXED_MERGE, /* merge of different types, fail separately */ - __REQ_PM, /* runtime pm request */ - __REQ_HASHED, /* on IO scheduler merge hash */ - __REQ_MQ_INFLIGHT, /* track inflight for MQ */ __REQ_NR_BITS, /* stops here */ }; @@ -208,29 +188,12 @@ enum rq_flag_bits { /* This mask is used for both bio and request merge checking */ #define REQ_NOMERGE_FLAGS \ - (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_PREFLUSH | REQ_FUA | REQ_FLUSH_SEQ) + (REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA) #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) -#define REQ_SORTED (1ULL << __REQ_SORTED) -#define REQ_SOFTBARRIER (1ULL << __REQ_SOFTBARRIER) #define REQ_FUA (1ULL << __REQ_FUA) #define REQ_NOMERGE (1ULL << __REQ_NOMERGE) -#define REQ_STARTED (1ULL << __REQ_STARTED) -#define REQ_DONTPREP (1ULL << __REQ_DONTPREP) -#define REQ_QUEUED (1ULL << __REQ_QUEUED) -#define REQ_ELVPRIV (1ULL << __REQ_ELVPRIV) -#define REQ_FAILED (1ULL << __REQ_FAILED) -#define REQ_QUIET (1ULL << __REQ_QUIET) -#define REQ_PREEMPT (1ULL << __REQ_PREEMPT) -#define REQ_ALLOCED (1ULL << __REQ_ALLOCED) -#define REQ_COPY_USER (1ULL << __REQ_COPY_USER) #define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH) -#define REQ_FLUSH_SEQ (1ULL << __REQ_FLUSH_SEQ) -#define REQ_IO_STAT (1ULL << __REQ_IO_STAT) -#define REQ_MIXED_MERGE (1ULL << __REQ_MIXED_MERGE) -#define REQ_PM (1ULL << __REQ_PM) -#define REQ_HASHED (1ULL << __REQ_HASHED) -#define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) enum req_op { REQ_OP_READ, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 90097dd8b8ed..b4415feac679 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -78,6 +78,50 @@ enum rq_cmd_type_bits { REQ_TYPE_DRV_PRIV, /* driver defined types from here */ }; +/* + * request flags */ +typedef __u32 __bitwise req_flags_t; + +/* elevator knows about this request */ +#define RQF_SORTED ((__force req_flags_t)(1 << 0)) +/* drive already may have started this one */ +#define RQF_STARTED ((__force req_flags_t)(1 << 1)) +/* uses tagged queueing */ +#define RQF_QUEUED ((__force req_flags_t)(1 << 2)) +/* may not be passed by ioscheduler */ +#define RQF_SOFTBARRIER ((__force req_flags_t)(1 << 3)) +/* request for flush sequence */ +#define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4)) +/* merge of different types, fail separately */ +#define RQF_MIXED_MERGE ((__force req_flags_t)(1 << 5)) +/* track inflight for MQ */ +#define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6)) +/* don't call prep for this one */ +#define RQF_DONTPREP ((__force req_flags_t)(1 << 7)) +/* set for "ide_preempt" requests and also for requests for which the SCSI + "quiesce" state must be ignored. */ +#define RQF_PREEMPT ((__force req_flags_t)(1 << 8)) +/* contains copies of user pages */ +#define RQF_COPY_USER ((__force req_flags_t)(1 << 9)) +/* vaguely specified driver internal error. Ignored by the block layer */ +#define RQF_FAILED ((__force req_flags_t)(1 << 10)) +/* don't warn about errors */ +#define RQF_QUIET ((__force req_flags_t)(1 << 11)) +/* elevator private data attached */ +#define RQF_ELVPRIV ((__force req_flags_t)(1 << 12)) +/* account I/O stat */ +#define RQF_IO_STAT ((__force req_flags_t)(1 << 13)) +/* request came from our alloc pool */ +#define RQF_ALLOCED ((__force req_flags_t)(1 << 14)) +/* runtime pm request */ +#define RQF_PM ((__force req_flags_t)(1 << 15)) +/* on IO scheduler merge hash */ +#define RQF_HASHED ((__force req_flags_t)(1 << 16)) + +/* flags that prevent us from merging requests: */ +#define RQF_NOMERGE_FLAGS \ + (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ) + #define BLK_MAX_CDB 16 /* @@ -99,6 +143,7 @@ struct request { int cpu; unsigned cmd_type; u64 cmd_flags; + req_flags_t rq_flags; unsigned long atomic_flags; /* the following two fields are internal, NEVER access directly */ @@ -648,7 +693,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) REQ_FAILFAST_DRIVER)) #define blk_account_rq(rq) \ - (((rq)->cmd_flags & REQ_STARTED) && \ + (((rq)->rq_flags & RQF_STARTED) && \ ((rq)->cmd_type == REQ_TYPE_FS)) #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) @@ -740,6 +785,8 @@ static inline bool rq_mergeable(struct request *rq) if (rq->cmd_flags & REQ_NOMERGE_FLAGS) return false; + if (rq->rq_flags & RQF_NOMERGE_FLAGS) + return false; return true; } diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 8a9563144890..8990e580b278 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -414,14 +414,14 @@ extern int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, extern int scsi_execute_req_flags(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout, - int retries, int *resid, u64 flags); + int retries, int *resid, u64 flags, req_flags_t rq_flags); static inline int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout, int retries, int *resid) { return scsi_execute_req_flags(sdev, cmd, data_direction, buffer, - bufflen, sshdr, timeout, retries, resid, 0); + bufflen, sshdr, timeout, retries, resid, 0, 0); } extern void sdev_disable_disk_events(struct scsi_device *sdev); extern void sdev_enable_disk_events(struct scsi_device *sdev); -- cgit v1.2.3 From ef295ecf090d3e86e5b742fc6ab34f1122a43773 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Oct 2016 08:48:16 -0600 Subject: block: better op and flags encoding Now that we don't need the common flags to overflow outside the range of a 32-bit type we can encode them the same way for both the bio and request fields. This in addition allows us to place the operation first (and make some room for more ops while we're at it) and to stop having to shift around the operation values. In addition this allows passing around only one value in the block layer instead of two (and eventuall also in the file systems, but we can do that later) and thus clean up a lot of code. Last but not least this allows decreasing the size of the cmd_flags field in struct request to 32-bits. Various functions passing this value could also be updated, but I'd like to avoid the churn for now. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- Documentation/block/biodoc.txt | 4 +- block/blk-core.c | 60 ++++++++++-------------------- block/blk-flush.c | 2 +- block/blk-lib.c | 2 +- block/blk-map.c | 2 + block/blk-mq.c | 28 ++++++-------- block/cfq-iosched.c | 66 ++++++++++++++++----------------- block/elevator.c | 4 +- drivers/md/dm-crypt.c | 2 +- drivers/scsi/sd.c | 3 +- fs/btrfs/inode.c | 5 +-- fs/buffer.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/gfs2/lops.c | 2 +- include/linux/blk-cgroup.h | 11 +++--- include/linux/blk_types.h | 83 +++++++++++++++++++----------------------- include/linux/blkdev.h | 26 +------------ include/linux/blktrace_api.h | 2 +- include/linux/dm-io.h | 2 +- include/linux/elevator.h | 4 +- include/trace/events/bcache.h | 12 ++---- include/trace/events/block.h | 31 ++++++---------- kernel/trace/blktrace.c | 14 +++---- 23 files changed, 148 insertions(+), 221 deletions(-) (limited to 'include/linux') diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 6acea160298c..01ddeaf64b0f 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -553,8 +553,8 @@ struct request { struct request_list *rl; } -See the rq_flag_bits definitions for an explanation of the various flags -available. Some bits are used by the block layer or i/o scheduler. +See the req_ops and req_flag_bits definitions for an explanation of the various +flags available. Some bits are used by the block layer or i/o scheduler. The behaviour of the various sector counts are almost the same as before, except that since we have multi-segment bios, current_nr_sectors refers diff --git a/block/blk-core.c b/block/blk-core.c index fd416651a676..0bfaa54d3e9f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1056,8 +1056,7 @@ static struct io_context *rq_ioc(struct bio *bio) /** * __get_request - get a free request * @rl: request list to allocate from - * @op: REQ_OP_READ/REQ_OP_WRITE - * @op_flags: rq_flag_bits + * @op: operation and flags * @bio: bio to allocate request for (can be %NULL) * @gfp_mask: allocation mask * @@ -1068,23 +1067,22 @@ static struct io_context *rq_ioc(struct bio *bio) * Returns ERR_PTR on failure, with @q->queue_lock held. * Returns request pointer on success, with @q->queue_lock *not held*. */ -static struct request *__get_request(struct request_list *rl, int op, - int op_flags, struct bio *bio, - gfp_t gfp_mask) +static struct request *__get_request(struct request_list *rl, unsigned int op, + struct bio *bio, gfp_t gfp_mask) { struct request_queue *q = rl->q; struct request *rq; struct elevator_type *et = q->elevator->type; struct io_context *ioc = rq_ioc(bio); struct io_cq *icq = NULL; - const bool is_sync = rw_is_sync(op, op_flags) != 0; + const bool is_sync = op_is_sync(op); int may_queue; req_flags_t rq_flags = RQF_ALLOCED; if (unlikely(blk_queue_dying(q))) return ERR_PTR(-ENODEV); - may_queue = elv_may_queue(q, op, op_flags); + may_queue = elv_may_queue(q, op); if (may_queue == ELV_MQUEUE_NO) goto rq_starved; @@ -1154,7 +1152,7 @@ static struct request *__get_request(struct request_list *rl, int op, blk_rq_init(q, rq); blk_rq_set_rl(rq, rl); - req_set_op_attrs(rq, op, op_flags); + rq->cmd_flags = op; rq->rq_flags = rq_flags; /* init elvpriv */ @@ -1232,8 +1230,7 @@ rq_starved: /** * get_request - get a free request * @q: request_queue to allocate request from - * @op: REQ_OP_READ/REQ_OP_WRITE - * @op_flags: rq_flag_bits + * @op: operation and flags * @bio: bio to allocate request for (can be %NULL) * @gfp_mask: allocation mask * @@ -1244,18 +1241,17 @@ rq_starved: * Returns ERR_PTR on failure, with @q->queue_lock held. * Returns request pointer on success, with @q->queue_lock *not held*. */ -static struct request *get_request(struct request_queue *q, int op, - int op_flags, struct bio *bio, - gfp_t gfp_mask) +static struct request *get_request(struct request_queue *q, unsigned int op, + struct bio *bio, gfp_t gfp_mask) { - const bool is_sync = rw_is_sync(op, op_flags) != 0; + const bool is_sync = op_is_sync(op); DEFINE_WAIT(wait); struct request_list *rl; struct request *rq; rl = blk_get_rl(q, bio); /* transferred to @rq on success */ retry: - rq = __get_request(rl, op, op_flags, bio, gfp_mask); + rq = __get_request(rl, op, bio, gfp_mask); if (!IS_ERR(rq)) return rq; @@ -1297,7 +1293,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw, create_io_context(gfp_mask, q->node); spin_lock_irq(q->queue_lock); - rq = get_request(q, rw, 0, NULL, gfp_mask); + rq = get_request(q, rw, NULL, gfp_mask); if (IS_ERR(rq)) { spin_unlock_irq(q->queue_lock); return rq; @@ -1446,7 +1442,7 @@ void __blk_put_request(struct request_queue *q, struct request *req) */ if (rq_flags & RQF_ALLOCED) { struct request_list *rl = blk_rq_rl(req); - bool sync = rw_is_sync(req_op(req), req->cmd_flags); + bool sync = op_is_sync(req->cmd_flags); BUG_ON(!list_empty(&req->queuelist)); BUG_ON(ELV_ON_HASH(req)); @@ -1652,8 +1648,6 @@ out: void init_request_from_bio(struct request *req, struct bio *bio) { req->cmd_type = REQ_TYPE_FS; - - req->cmd_flags |= bio->bi_opf & REQ_COMMON_MASK; if (bio->bi_opf & REQ_RAHEAD) req->cmd_flags |= REQ_FAILFAST_MASK; @@ -1665,9 +1659,8 @@ void init_request_from_bio(struct request *req, struct bio *bio) static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) { - const bool sync = !!(bio->bi_opf & REQ_SYNC); struct blk_plug *plug; - int el_ret, rw_flags = 0, where = ELEVATOR_INSERT_SORT; + int el_ret, where = ELEVATOR_INSERT_SORT; struct request *req; unsigned int request_count = 0; @@ -1722,24 +1715,11 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) } get_rq: - /* - * This sync check and mask will be re-done in init_request_from_bio(), - * but we need to set it earlier to expose the sync flag to the - * rq allocator and io schedulers. - */ - if (sync) - rw_flags |= REQ_SYNC; - - /* - * Add in META/PRIO flags, if set, before we get to the IO scheduler - */ - rw_flags |= (bio->bi_opf & (REQ_META | REQ_PRIO)); - /* * Grab a free request. This is might sleep but can not fail. * Returns with the queue unlocked. */ - req = get_request(q, bio_data_dir(bio), rw_flags, bio, GFP_NOIO); + req = get_request(q, bio->bi_opf, bio, GFP_NOIO); if (IS_ERR(req)) { bio->bi_error = PTR_ERR(req); bio_endio(bio); @@ -2946,8 +2926,6 @@ EXPORT_SYMBOL_GPL(__blk_end_request_err); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio) { - req_set_op(rq, bio_op(bio)); - if (bio_has_data(bio)) rq->nr_phys_segments = bio_phys_segments(q, bio); @@ -3031,8 +3009,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); static void __blk_rq_prep_clone(struct request *dst, struct request *src) { dst->cpu = src->cpu; - req_set_op_attrs(dst, req_op(src), - (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE); + dst->cmd_flags = src->cmd_flags | REQ_NOMERGE; dst->cmd_type = src->cmd_type; dst->__sector = blk_rq_pos(src); dst->__data_len = blk_rq_bytes(src); @@ -3537,8 +3514,11 @@ EXPORT_SYMBOL(blk_set_runtime_active); int __init blk_dev_init(void) { - BUILD_BUG_ON(__REQ_NR_BITS > 8 * + BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS)); + BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 * FIELD_SIZEOF(struct request, cmd_flags)); + BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 * + FIELD_SIZEOF(struct bio, bi_opf)); /* used for unplugging and affects IO latency/throughput - HIGHPRI */ kblockd_workqueue = alloc_workqueue("kblockd", diff --git a/block/blk-flush.c b/block/blk-flush.c index 3990b9cfbda5..95f1d4d357df 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -330,7 +330,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq) } flush_rq->cmd_type = REQ_TYPE_FS; - req_set_op_attrs(flush_rq, REQ_OP_FLUSH, WRITE_FLUSH); + flush_rq->cmd_flags = REQ_OP_FLUSH | WRITE_FLUSH; flush_rq->rq_flags |= RQF_FLUSH_SEQ; flush_rq->rq_disk = first_rq->rq_disk; flush_rq->end_io = flush_end_io; diff --git a/block/blk-lib.c b/block/blk-lib.c index 46fe9248410d..18abda862915 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -29,7 +29,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, struct request_queue *q = bdev_get_queue(bdev); struct bio *bio = *biop; unsigned int granularity; - enum req_op op; + unsigned int op; int alignment; sector_t bs_mask; diff --git a/block/blk-map.c b/block/blk-map.c index 2c5ae5fef473..0173a72a8aa9 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -16,6 +16,8 @@ int blk_rq_append_bio(struct request *rq, struct bio *bio) { if (!rq->bio) { + rq->cmd_flags &= REQ_OP_MASK; + rq->cmd_flags |= (bio->bi_opf & REQ_OP_MASK); blk_rq_bio_prep(rq->q, rq, bio); } else { if (!ll_back_merge_fn(rq->q, rq, bio)) diff --git a/block/blk-mq.c b/block/blk-mq.c index b49c6658eb05..2da1a0ee3318 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -139,14 +139,13 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) EXPORT_SYMBOL(blk_mq_can_queue); static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, - struct request *rq, int op, - unsigned int op_flags) + struct request *rq, unsigned int op) { INIT_LIST_HEAD(&rq->queuelist); /* csd/requeue_work/fifo_time is initialized before use */ rq->q = q; rq->mq_ctx = ctx; - req_set_op_attrs(rq, op, op_flags); + rq->cmd_flags = op; if (blk_queue_io_stat(q)) rq->rq_flags |= RQF_IO_STAT; /* do not touch atomic flags, it needs atomic ops against the timer */ @@ -183,11 +182,11 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, rq->end_io_data = NULL; rq->next_rq = NULL; - ctx->rq_dispatched[rw_is_sync(op, op_flags)]++; + ctx->rq_dispatched[op_is_sync(op)]++; } static struct request * -__blk_mq_alloc_request(struct blk_mq_alloc_data *data, int op, int op_flags) +__blk_mq_alloc_request(struct blk_mq_alloc_data *data, unsigned int op) { struct request *rq; unsigned int tag; @@ -202,7 +201,7 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int op, int op_flags) } rq->tag = tag; - blk_mq_rq_ctx_init(data->q, data->ctx, rq, op, op_flags); + blk_mq_rq_ctx_init(data->q, data->ctx, rq, op); return rq; } @@ -225,7 +224,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, ctx = blk_mq_get_ctx(q); hctx = blk_mq_map_queue(q, ctx->cpu); blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); - rq = __blk_mq_alloc_request(&alloc_data, rw, 0); + rq = __blk_mq_alloc_request(&alloc_data, rw); blk_mq_put_ctx(ctx); if (!rq) { @@ -277,7 +276,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw, ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask)); blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); - rq = __blk_mq_alloc_request(&alloc_data, rw, 0); + rq = __blk_mq_alloc_request(&alloc_data, rw); if (!rq) { ret = -EWOULDBLOCK; goto out_queue_exit; @@ -1196,19 +1195,14 @@ static struct request *blk_mq_map_request(struct request_queue *q, struct blk_mq_hw_ctx *hctx; struct blk_mq_ctx *ctx; struct request *rq; - int op = bio_data_dir(bio); - int op_flags = 0; blk_queue_enter_live(q); ctx = blk_mq_get_ctx(q); hctx = blk_mq_map_queue(q, ctx->cpu); - if (rw_is_sync(bio_op(bio), bio->bi_opf)) - op_flags |= REQ_SYNC; - - trace_block_getrq(q, bio, op); + trace_block_getrq(q, bio, bio->bi_opf); blk_mq_set_alloc_data(data, q, 0, ctx, hctx); - rq = __blk_mq_alloc_request(data, op, op_flags); + rq = __blk_mq_alloc_request(data, bio->bi_opf); data->hctx->queued++; return rq; @@ -1256,7 +1250,7 @@ static int blk_mq_direct_issue_request(struct request *rq, blk_qc_t *cookie) */ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) { - const int is_sync = rw_is_sync(bio_op(bio), bio->bi_opf); + const int is_sync = op_is_sync(bio->bi_opf); const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); struct blk_mq_alloc_data data; struct request *rq; @@ -1350,7 +1344,7 @@ done: */ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) { - const int is_sync = rw_is_sync(bio_op(bio), bio->bi_opf); + const int is_sync = op_is_sync(bio->bi_opf); const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); struct blk_plug *plug; unsigned int request_count = 0; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 5e24d880306c..c96186adaa66 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -667,10 +667,10 @@ static inline void cfqg_put(struct cfq_group *cfqg) } while (0) static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg, - struct cfq_group *curr_cfqg, int op, - int op_flags) + struct cfq_group *curr_cfqg, + unsigned int op) { - blkg_rwstat_add(&cfqg->stats.queued, op, op_flags, 1); + blkg_rwstat_add(&cfqg->stats.queued, op, 1); cfqg_stats_end_empty_time(&cfqg->stats); cfqg_stats_set_start_group_wait_time(cfqg, curr_cfqg); } @@ -684,30 +684,29 @@ static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg, #endif } -static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int op, - int op_flags) +static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, + unsigned int op) { - blkg_rwstat_add(&cfqg->stats.queued, op, op_flags, -1); + blkg_rwstat_add(&cfqg->stats.queued, op, -1); } -static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int op, - int op_flags) +static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, + unsigned int op) { - blkg_rwstat_add(&cfqg->stats.merged, op, op_flags, 1); + blkg_rwstat_add(&cfqg->stats.merged, op, 1); } static inline void cfqg_stats_update_completion(struct cfq_group *cfqg, - uint64_t start_time, uint64_t io_start_time, int op, - int op_flags) + uint64_t start_time, uint64_t io_start_time, + unsigned int op) { struct cfqg_stats *stats = &cfqg->stats; unsigned long long now = sched_clock(); if (time_after64(now, io_start_time)) - blkg_rwstat_add(&stats->service_time, op, op_flags, - now - io_start_time); + blkg_rwstat_add(&stats->service_time, op, now - io_start_time); if (time_after64(io_start_time, start_time)) - blkg_rwstat_add(&stats->wait_time, op, op_flags, + blkg_rwstat_add(&stats->wait_time, op, io_start_time - start_time); } @@ -786,16 +785,16 @@ static inline void cfqg_put(struct cfq_group *cfqg) { } #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0) static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg, - struct cfq_group *curr_cfqg, int op, int op_flags) { } + struct cfq_group *curr_cfqg, unsigned int op) { } static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg, uint64_t time, unsigned long unaccounted_time) { } -static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int op, - int op_flags) { } -static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int op, - int op_flags) { } +static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, + unsigned int op) { } +static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, + unsigned int op) { } static inline void cfqg_stats_update_completion(struct cfq_group *cfqg, - uint64_t start_time, uint64_t io_start_time, int op, - int op_flags) { } + uint64_t start_time, uint64_t io_start_time, + unsigned int op) { } #endif /* CONFIG_CFQ_GROUP_IOSCHED */ @@ -2474,10 +2473,10 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) { elv_rb_del(&cfqq->sort_list, rq); cfqq->queued[rq_is_sync(rq)]--; - cfqg_stats_update_io_remove(RQ_CFQG(rq), req_op(rq), rq->cmd_flags); + cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags); cfq_add_rq_rb(rq); cfqg_stats_update_io_add(RQ_CFQG(rq), cfqq->cfqd->serving_group, - req_op(rq), rq->cmd_flags); + rq->cmd_flags); } static struct request * @@ -2530,7 +2529,7 @@ static void cfq_remove_request(struct request *rq) cfq_del_rq_rb(rq); cfqq->cfqd->rq_queued--; - cfqg_stats_update_io_remove(RQ_CFQG(rq), req_op(rq), rq->cmd_flags); + cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags); if (rq->cmd_flags & REQ_PRIO) { WARN_ON(!cfqq->prio_pending); cfqq->prio_pending--; @@ -2565,7 +2564,7 @@ static void cfq_merged_request(struct request_queue *q, struct request *req, static void cfq_bio_merged(struct request_queue *q, struct request *req, struct bio *bio) { - cfqg_stats_update_io_merged(RQ_CFQG(req), bio_op(bio), bio->bi_opf); + cfqg_stats_update_io_merged(RQ_CFQG(req), bio->bi_opf); } static void @@ -2588,7 +2587,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq, if (cfqq->next_rq == next) cfqq->next_rq = rq; cfq_remove_request(next); - cfqg_stats_update_io_merged(RQ_CFQG(rq), req_op(next), next->cmd_flags); + cfqg_stats_update_io_merged(RQ_CFQG(rq), next->cmd_flags); cfqq = RQ_CFQQ(next); /* @@ -4142,7 +4141,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) rq->fifo_time = ktime_get_ns() + cfqd->cfq_fifo_expire[rq_is_sync(rq)]; list_add_tail(&rq->queuelist, &cfqq->fifo); cfq_add_rq_rb(rq); - cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group, req_op(rq), + cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group, rq->cmd_flags); cfq_rq_enqueued(cfqd, cfqq, rq); } @@ -4240,8 +4239,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfqq->dispatched--; (RQ_CFQG(rq))->dispatched--; cfqg_stats_update_completion(cfqq->cfqg, rq_start_time_ns(rq), - rq_io_start_time_ns(rq), req_op(rq), - rq->cmd_flags); + rq_io_start_time_ns(rq), rq->cmd_flags); cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; @@ -4319,14 +4317,14 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfq_schedule_dispatch(cfqd); } -static void cfqq_boost_on_prio(struct cfq_queue *cfqq, int op_flags) +static void cfqq_boost_on_prio(struct cfq_queue *cfqq, unsigned int op) { /* * If REQ_PRIO is set, boost class and prio level, if it's below * BE/NORM. If prio is not set, restore the potentially boosted * class/prio level. */ - if (!(op_flags & REQ_PRIO)) { + if (!(op & REQ_PRIO)) { cfqq->ioprio_class = cfqq->org_ioprio_class; cfqq->ioprio = cfqq->org_ioprio; } else { @@ -4347,7 +4345,7 @@ static inline int __cfq_may_queue(struct cfq_queue *cfqq) return ELV_MQUEUE_MAY; } -static int cfq_may_queue(struct request_queue *q, int op, int op_flags) +static int cfq_may_queue(struct request_queue *q, unsigned int op) { struct cfq_data *cfqd = q->elevator->elevator_data; struct task_struct *tsk = current; @@ -4364,10 +4362,10 @@ static int cfq_may_queue(struct request_queue *q, int op, int op_flags) if (!cic) return ELV_MQUEUE_MAY; - cfqq = cic_to_cfqq(cic, rw_is_sync(op, op_flags)); + cfqq = cic_to_cfqq(cic, op_is_sync(op)); if (cfqq) { cfq_init_prio_data(cfqq, cic); - cfqq_boost_on_prio(cfqq, op_flags); + cfqq_boost_on_prio(cfqq, op); return __cfq_may_queue(cfqq); } diff --git a/block/elevator.c b/block/elevator.c index ac80f89a0842..a18a5db274e4 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -714,12 +714,12 @@ void elv_put_request(struct request_queue *q, struct request *rq) e->type->ops.elevator_put_req_fn(rq); } -int elv_may_queue(struct request_queue *q, int op, int op_flags) +int elv_may_queue(struct request_queue *q, unsigned int op) { struct elevator_queue *e = q->elevator; if (e->type->ops.elevator_may_queue_fn) - return e->type->ops.elevator_may_queue_fn(q, op, op_flags); + return e->type->ops.elevator_may_queue_fn(q, op); return ELV_MQUEUE_MAY; } diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index a2768835d394..68a9eb4f3f36 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1135,7 +1135,7 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone) clone->bi_private = io; clone->bi_end_io = crypt_endio; clone->bi_bdev = cc->dev->bdev; - bio_set_op_attrs(clone, bio_op(io->base_bio), bio_flags(io->base_bio)); + clone->bi_opf = io->base_bio->bi_opf; } static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index cef1f78031d4..65738b0aad36 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1031,8 +1031,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt) } else if (rq_data_dir(rq) == READ) { SCpnt->cmnd[0] = READ_6; } else { - scmd_printk(KERN_ERR, SCpnt, "Unknown command %llu,%llx\n", - req_op(rq), (unsigned long long) rq->cmd_flags); + scmd_printk(KERN_ERR, SCpnt, "Unknown command %d\n", req_op(rq)); goto out; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2b790bda7998..9a377079af26 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8427,7 +8427,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip, if (!bio) return -ENOMEM; - bio_set_op_attrs(bio, bio_op(orig_bio), bio_flags(orig_bio)); + bio->bi_opf = orig_bio->bi_opf; bio->bi_private = dip; bio->bi_end_io = btrfs_end_dio_bio; btrfs_io_bio(bio)->logical = file_offset; @@ -8465,8 +8465,7 @@ next_block: start_sector, GFP_NOFS); if (!bio) goto out_err; - bio_set_op_attrs(bio, bio_op(orig_bio), - bio_flags(orig_bio)); + bio->bi_opf = orig_bio->bi_opf; bio->bi_private = dip; bio->bi_end_io = btrfs_end_dio_bio; btrfs_io_bio(bio)->logical = file_offset; diff --git a/fs/buffer.c b/fs/buffer.c index b205a629001d..a29335867e30 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3118,7 +3118,7 @@ EXPORT_SYMBOL(submit_bh); /** * ll_rw_block: low-level access to block devices (DEPRECATED) * @op: whether to %READ or %WRITE - * @op_flags: rq_flag_bits + * @op_flags: req_flag_bits * @nr: number of &struct buffer_heads in the array * @bhs: array of pointers to &struct buffer_head * diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9e8de18a168a..2cf4f7f09e32 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -688,7 +688,7 @@ struct f2fs_io_info { struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ int op; /* contains REQ_OP_ */ - int op_flags; /* rq_flag_bits */ + int op_flags; /* req_flag_bits */ block_t new_blkaddr; /* new block address to be written */ block_t old_blkaddr; /* old block address before Cow */ struct page *page; /* page to be written */ diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 49d5a1b61b06..b1f9144b42c7 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -231,7 +231,7 @@ static void gfs2_end_log_write(struct bio *bio) * gfs2_log_flush_bio - Submit any pending log bio * @sdp: The superblock * @op: REQ_OP - * @op_flags: rq_flag_bits + * @op_flags: req_flag_bits * * Submit any pending part-built or full bio to the block device. If * there is no pending bio, then this is a no-op. diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 3bf5d33800ab..ddaf28d0988f 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -581,15 +581,14 @@ static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat) /** * blkg_rwstat_add - add a value to a blkg_rwstat * @rwstat: target blkg_rwstat - * @op: REQ_OP - * @op_flags: rq_flag_bits + * @op: REQ_OP and flags * @val: value to add * * Add @val to @rwstat. The counters are chosen according to @rw. The * caller is responsible for synchronizing calls to this function. */ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, - int op, int op_flags, uint64_t val) + unsigned int op, uint64_t val) { struct percpu_counter *cnt; @@ -600,7 +599,7 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, __percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH); - if (op_flags & REQ_SYNC) + if (op & REQ_SYNC) cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC]; else cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC]; @@ -705,9 +704,9 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, if (!throtl) { blkg = blkg ?: q->root_blkg; - blkg_rwstat_add(&blkg->stat_bytes, bio_op(bio), bio->bi_opf, + blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf, bio->bi_iter.bi_size); - blkg_rwstat_add(&blkg->stat_ios, bio_op(bio), bio->bi_opf, 1); + blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); } rcu_read_unlock(); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index ec69a8fe3b29..dca972d67548 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -88,24 +88,6 @@ struct bio { struct bio_vec bi_inline_vecs[0]; }; -#define BIO_OP_SHIFT (8 * FIELD_SIZEOF(struct bio, bi_opf) - REQ_OP_BITS) -#define bio_flags(bio) ((bio)->bi_opf & ((1 << BIO_OP_SHIFT) - 1)) -#define bio_op(bio) ((bio)->bi_opf >> BIO_OP_SHIFT) - -#define bio_set_op_attrs(bio, op, op_flags) do { \ - if (__builtin_constant_p(op)) \ - BUILD_BUG_ON((op) + 0U >= (1U << REQ_OP_BITS)); \ - else \ - WARN_ON_ONCE((op) + 0U >= (1U << REQ_OP_BITS)); \ - if (__builtin_constant_p(op_flags)) \ - BUILD_BUG_ON((op_flags) + 0U >= (1U << BIO_OP_SHIFT)); \ - else \ - WARN_ON_ONCE((op_flags) + 0U >= (1U << BIO_OP_SHIFT)); \ - (bio)->bi_opf = bio_flags(bio); \ - (bio)->bi_opf |= (((op) + 0U) << BIO_OP_SHIFT); \ - (bio)->bi_opf |= (op_flags); \ -} while (0) - #define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) /* @@ -147,26 +129,40 @@ struct bio { #endif /* CONFIG_BLOCK */ /* - * Request flags. For use in the cmd_flags field of struct request, and in - * bi_opf of struct bio. Note that some flags are only valid in either one. + * Operations and flags common to the bio and request structures. + * We use 8 bits for encoding the operation, and the remaining 24 for flags. */ -enum rq_flag_bits { - /* common flags */ - __REQ_FAILFAST_DEV, /* no driver retries of device errors */ +#define REQ_OP_BITS 8 +#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) +#define REQ_FLAG_BITS 24 + +enum req_opf { + REQ_OP_READ, + REQ_OP_WRITE, + REQ_OP_DISCARD, /* request to discard sectors */ + REQ_OP_SECURE_ERASE, /* request to securely erase sectors */ + REQ_OP_WRITE_SAME, /* write same block many times */ + REQ_OP_FLUSH, /* request for cache flush */ + REQ_OP_ZONE_REPORT, /* Get zone information */ + REQ_OP_ZONE_RESET, /* Reset a zone write pointer */ + + REQ_OP_LAST, +}; + +enum req_flag_bits { + __REQ_FAILFAST_DEV = /* no driver retries of device errors */ + REQ_OP_BITS, __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ - __REQ_SYNC, /* request is sync (sync write or read) */ __REQ_META, /* metadata io request */ __REQ_PRIO, /* boost priority in cfq */ - __REQ_NOMERGE, /* don't touch this for merging */ __REQ_NOIDLE, /* don't anticipate more IO after this one */ __REQ_INTEGRITY, /* I/O includes block integrity payload */ __REQ_FUA, /* forced unit access */ __REQ_PREFLUSH, /* request for cache flush */ __REQ_RAHEAD, /* read ahead, can fail anytime */ - __REQ_NR_BITS, /* stops here */ }; @@ -176,37 +172,32 @@ enum rq_flag_bits { #define REQ_SYNC (1ULL << __REQ_SYNC) #define REQ_META (1ULL << __REQ_META) #define REQ_PRIO (1ULL << __REQ_PRIO) +#define REQ_NOMERGE (1ULL << __REQ_NOMERGE) #define REQ_NOIDLE (1ULL << __REQ_NOIDLE) #define REQ_INTEGRITY (1ULL << __REQ_INTEGRITY) +#define REQ_FUA (1ULL << __REQ_FUA) +#define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH) +#define REQ_RAHEAD (1ULL << __REQ_RAHEAD) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) -#define REQ_COMMON_MASK \ - (REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | REQ_NOIDLE | \ - REQ_PREFLUSH | REQ_FUA | REQ_INTEGRITY | REQ_NOMERGE | REQ_RAHEAD) -#define REQ_CLONE_MASK REQ_COMMON_MASK -/* This mask is used for both bio and request merge checking */ #define REQ_NOMERGE_FLAGS \ (REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA) -#define REQ_RAHEAD (1ULL << __REQ_RAHEAD) -#define REQ_FUA (1ULL << __REQ_FUA) -#define REQ_NOMERGE (1ULL << __REQ_NOMERGE) -#define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH) +#define bio_op(bio) \ + ((bio)->bi_opf & REQ_OP_MASK) +#define req_op(req) \ + ((req)->cmd_flags & REQ_OP_MASK) -enum req_op { - REQ_OP_READ, - REQ_OP_WRITE, - REQ_OP_DISCARD, /* request to discard sectors */ - REQ_OP_SECURE_ERASE, /* request to securely erase sectors */ - REQ_OP_WRITE_SAME, /* write same block many times */ - REQ_OP_FLUSH, /* request for cache flush */ - REQ_OP_ZONE_REPORT, /* Get zone information */ - REQ_OP_ZONE_RESET, /* Reset a zone write pointer */ -}; +/* obsolete, don't use in new code */ +#define bio_set_op_attrs(bio, op, op_flags) \ + ((bio)->bi_opf |= (op | op_flags)) -#define REQ_OP_BITS 3 +static inline bool op_is_sync(unsigned int op) +{ + return (op & REQ_OP_MASK) == REQ_OP_READ || (op & REQ_SYNC); +} typedef unsigned int blk_qc_t; #define BLK_QC_T_NONE -1U diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b4415feac679..8396da2bb698 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -142,7 +142,7 @@ struct request { int cpu; unsigned cmd_type; - u64 cmd_flags; + unsigned int cmd_flags; /* op and common flags */ req_flags_t rq_flags; unsigned long atomic_flags; @@ -244,20 +244,6 @@ struct request { struct request *next_rq; }; -#define REQ_OP_SHIFT (8 * sizeof(u64) - REQ_OP_BITS) -#define req_op(req) ((req)->cmd_flags >> REQ_OP_SHIFT) - -#define req_set_op(req, op) do { \ - WARN_ON(op >= (1 << REQ_OP_BITS)); \ - (req)->cmd_flags &= ((1ULL << REQ_OP_SHIFT) - 1); \ - (req)->cmd_flags |= ((u64) (op) << REQ_OP_SHIFT); \ -} while (0) - -#define req_set_op_attrs(req, op, flags) do { \ - req_set_op(req, op); \ - (req)->cmd_flags |= flags; \ -} while (0) - static inline unsigned short req_get_ioprio(struct request *req) { return req->ioprio; @@ -741,17 +727,9 @@ static inline unsigned int blk_queue_zone_size(struct request_queue *q) return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; } -/* - * We regard a request as sync, if either a read or a sync write - */ -static inline bool rw_is_sync(int op, unsigned int rw_flags) -{ - return op == REQ_OP_READ || (rw_flags & REQ_SYNC); -} - static inline bool rq_is_sync(struct request *rq) { - return rw_is_sync(req_op(rq), rq->cmd_flags); + return op_is_sync(rq->cmd_flags); } static inline bool blk_rl_full(struct request_list *rl, bool sync) diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index cceb72f9e29f..e417f080219a 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -118,7 +118,7 @@ static inline int blk_cmd_buf_len(struct request *rq) } extern void blk_dump_cmd(char *buf, struct request *rq); -extern void blk_fill_rwbs(char *rwbs, int op, u32 rw, int bytes); +extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes); #endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */ diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index b91b023deffb..a52c6580cc9a 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -58,7 +58,7 @@ struct dm_io_notify { struct dm_io_client; struct dm_io_request { int bi_op; /* REQ_OP */ - int bi_op_flags; /* rq_flag_bits */ + int bi_op_flags; /* req_flag_bits */ struct dm_io_memory mem; /* Memory to use for io */ struct dm_io_notify notify; /* Synchronous if notify.fn is NULL */ struct dm_io_client *client; /* Client memory handler */ diff --git a/include/linux/elevator.h b/include/linux/elevator.h index e7f358d2e5fc..f219c9aed360 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -30,7 +30,7 @@ typedef int (elevator_dispatch_fn) (struct request_queue *, int); typedef void (elevator_add_req_fn) (struct request_queue *, struct request *); typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *); typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *); -typedef int (elevator_may_queue_fn) (struct request_queue *, int, int); +typedef int (elevator_may_queue_fn) (struct request_queue *, unsigned int); typedef void (elevator_init_icq_fn) (struct io_cq *); typedef void (elevator_exit_icq_fn) (struct io_cq *); @@ -139,7 +139,7 @@ extern struct request *elv_former_request(struct request_queue *, struct request extern struct request *elv_latter_request(struct request_queue *, struct request *); extern int elv_register_queue(struct request_queue *q); extern void elv_unregister_queue(struct request_queue *q); -extern int elv_may_queue(struct request_queue *, int, int); +extern int elv_may_queue(struct request_queue *, unsigned int); extern void elv_completed_request(struct request_queue *, struct request *); extern int elv_set_request(struct request_queue *q, struct request *rq, struct bio *bio, gfp_t gfp_mask); diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index d336b890e31f..df3e9ae5ad8d 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -27,8 +27,7 @@ DECLARE_EVENT_CLASS(bcache_request, __entry->sector = bio->bi_iter.bi_sector; __entry->orig_sector = bio->bi_iter.bi_sector - 16; __entry->nr_sector = bio->bi_iter.bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf, - bio->bi_iter.bi_size); + blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); ), TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)", @@ -102,8 +101,7 @@ DECLARE_EVENT_CLASS(bcache_bio, __entry->dev = bio->bi_bdev->bd_dev; __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio->bi_iter.bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf, - bio->bi_iter.bi_size); + blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); ), TP_printk("%d,%d %s %llu + %u", @@ -138,8 +136,7 @@ TRACE_EVENT(bcache_read, __entry->dev = bio->bi_bdev->bd_dev; __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio->bi_iter.bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf, - bio->bi_iter.bi_size); + blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); __entry->cache_hit = hit; __entry->bypass = bypass; ), @@ -170,8 +167,7 @@ TRACE_EVENT(bcache_write, __entry->inode = inode; __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio->bi_iter.bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf, - bio->bi_iter.bi_size); + blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); __entry->writeback = writeback; __entry->bypass = bypass; ), diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 8f3a163b8166..3e02e3a25413 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -84,8 +84,7 @@ DECLARE_EVENT_CLASS(block_rq_with_error, 0 : blk_rq_sectors(rq); __entry->errors = rq->errors; - blk_fill_rwbs(__entry->rwbs, req_op(rq), rq->cmd_flags, - blk_rq_bytes(rq)); + blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); blk_dump_cmd(__get_str(cmd), rq); ), @@ -163,7 +162,7 @@ TRACE_EVENT(block_rq_complete, __entry->nr_sector = nr_bytes >> 9; __entry->errors = rq->errors; - blk_fill_rwbs(__entry->rwbs, req_op(rq), rq->cmd_flags, nr_bytes); + blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes); blk_dump_cmd(__get_str(cmd), rq); ), @@ -199,8 +198,7 @@ DECLARE_EVENT_CLASS(block_rq, __entry->bytes = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? blk_rq_bytes(rq) : 0; - blk_fill_rwbs(__entry->rwbs, req_op(rq), rq->cmd_flags, - blk_rq_bytes(rq)); + blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); blk_dump_cmd(__get_str(cmd), rq); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), @@ -274,8 +272,7 @@ TRACE_EVENT(block_bio_bounce, bio->bi_bdev->bd_dev : 0; __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); - blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf, - bio->bi_iter.bi_size); + blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), @@ -313,8 +310,7 @@ TRACE_EVENT(block_bio_complete, __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); __entry->error = error; - blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf, - bio->bi_iter.bi_size); + blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); ), TP_printk("%d,%d %s %llu + %u [%d]", @@ -341,8 +337,7 @@ DECLARE_EVENT_CLASS(block_bio_merge, __entry->dev = bio->bi_bdev->bd_dev; __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); - blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf, - bio->bi_iter.bi_size); + blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), @@ -409,8 +404,7 @@ TRACE_EVENT(block_bio_queue, __entry->dev = bio->bi_bdev->bd_dev; __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); - blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf, - bio->bi_iter.bi_size); + blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), @@ -438,7 +432,7 @@ DECLARE_EVENT_CLASS(block_get_rq, __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; __entry->sector = bio ? bio->bi_iter.bi_sector : 0; __entry->nr_sector = bio ? bio_sectors(bio) : 0; - blk_fill_rwbs(__entry->rwbs, bio ? bio_op(bio) : 0, + blk_fill_rwbs(__entry->rwbs, bio ? bio->bi_opf : 0, __entry->nr_sector); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), @@ -573,8 +567,7 @@ TRACE_EVENT(block_split, __entry->dev = bio->bi_bdev->bd_dev; __entry->sector = bio->bi_iter.bi_sector; __entry->new_sector = new_sector; - blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf, - bio->bi_iter.bi_size); + blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), @@ -617,8 +610,7 @@ TRACE_EVENT(block_bio_remap, __entry->nr_sector = bio_sectors(bio); __entry->old_dev = dev; __entry->old_sector = from; - blk_fill_rwbs(__entry->rwbs, bio_op(bio), bio->bi_opf, - bio->bi_iter.bi_size); + blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); ), TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", @@ -664,8 +656,7 @@ TRACE_EVENT(block_rq_remap, __entry->old_dev = dev; __entry->old_sector = from; __entry->nr_bios = blk_rq_count_bios(rq); - blk_fill_rwbs(__entry->rwbs, req_op(rq), rq->cmd_flags, - blk_rq_bytes(rq)); + blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); ), TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu %u", diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index dbafc5df03f3..95cecbf67f5c 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1777,14 +1777,14 @@ void blk_dump_cmd(char *buf, struct request *rq) } } -void blk_fill_rwbs(char *rwbs, int op, u32 rw, int bytes) +void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes) { int i = 0; - if (rw & REQ_PREFLUSH) + if (op & REQ_PREFLUSH) rwbs[i++] = 'F'; - switch (op) { + switch (op & REQ_OP_MASK) { case REQ_OP_WRITE: case REQ_OP_WRITE_SAME: rwbs[i++] = 'W'; @@ -1806,13 +1806,13 @@ void blk_fill_rwbs(char *rwbs, int op, u32 rw, int bytes) rwbs[i++] = 'N'; } - if (rw & REQ_FUA) + if (op & REQ_FUA) rwbs[i++] = 'F'; - if (rw & REQ_RAHEAD) + if (op & REQ_RAHEAD) rwbs[i++] = 'A'; - if (rw & REQ_SYNC) + if (op & REQ_SYNC) rwbs[i++] = 'S'; - if (rw & REQ_META) + if (op & REQ_META) rwbs[i++] = 'M'; rwbs[i] = '\0'; -- cgit v1.2.3 From 87374179c535a98337569904727aa02f960fe79e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:12:15 +0200 Subject: block: add a proper block layer data direction encoding Currently the block layer op_is_write, bio_data_dir and rq_data_dir helper treat every operation that is not a READ as a data out operation. This worked surprisingly long, but the new REQ_OP_ZONE_REPORT operation actually adds a second operation that reads data from the device. Surprisingly nothing critical relied on this direction, but this might be a good opportunity to properly fix this issue up. We take a little inspiration and use the least significant bit of the operation number to encode the data direction, which just requires us to renumber the operations to fix this scheme. Signed-off-by: Christoph Hellwig Reviewed-by: Shaun Tancheff Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 38 ++++++++++++++++++++++++++++++-------- include/linux/fs.h | 5 ----- 2 files changed, 30 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index dca972d67548..3fa62cabe8d2 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -131,20 +131,37 @@ struct bio { /* * Operations and flags common to the bio and request structures. * We use 8 bits for encoding the operation, and the remaining 24 for flags. + * + * The least significant bit of the operation number indicates the data + * transfer direction: + * + * - if the least significant bit is set transfers are TO the device + * - if the least significant bit is not set transfers are FROM the device + * + * If a operation does not transfer data the least significant bit has no + * meaning. */ #define REQ_OP_BITS 8 #define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) #define REQ_FLAG_BITS 24 enum req_opf { - REQ_OP_READ, - REQ_OP_WRITE, - REQ_OP_DISCARD, /* request to discard sectors */ - REQ_OP_SECURE_ERASE, /* request to securely erase sectors */ - REQ_OP_WRITE_SAME, /* write same block many times */ - REQ_OP_FLUSH, /* request for cache flush */ - REQ_OP_ZONE_REPORT, /* Get zone information */ - REQ_OP_ZONE_RESET, /* Reset a zone write pointer */ + /* read sectors from the device */ + REQ_OP_READ = 0, + /* write sectors to the device */ + REQ_OP_WRITE = 1, + /* flush the volatile write cache */ + REQ_OP_FLUSH = 2, + /* discard sectors */ + REQ_OP_DISCARD = 3, + /* get zone information */ + REQ_OP_ZONE_REPORT = 4, + /* securely erase sectors */ + REQ_OP_SECURE_ERASE = 5, + /* seset a zone write pointer */ + REQ_OP_ZONE_RESET = 6, + /* write the same sector many times */ + REQ_OP_WRITE_SAME = 7, REQ_OP_LAST, }; @@ -194,6 +211,11 @@ enum req_flag_bits { #define bio_set_op_attrs(bio, op, op_flags) \ ((bio)->bi_opf |= (op | op_flags)) +static inline bool op_is_write(unsigned int op) +{ + return (op & 1); +} + static inline bool op_is_sync(unsigned int op) { return (op & REQ_OP_MASK) == REQ_OP_READ || (op & REQ_SYNC); diff --git a/include/linux/fs.h b/include/linux/fs.h index 16d2b6e874d6..e3e878f12b25 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2499,11 +2499,6 @@ extern void make_bad_inode(struct inode *); extern bool is_bad_inode(struct inode *); #ifdef CONFIG_BLOCK -static inline bool op_is_write(unsigned int op) -{ - return op == REQ_OP_READ ? false : true; -} - /* * return data direction, READ or WRITE */ -- cgit v1.2.3 From 42412c3aae5d8ea57a46b8ff86bb67bc1a270d9c Mon Sep 17 00:00:00 2001 From: Silvio Fricke Date: Fri, 28 Oct 2016 10:14:09 +0200 Subject: workqueue: kerneldocify workqueue_attrs Only formating changes. Signed-off-by: Silvio Fricke Acked-by: Tejun Heo Signed-off-by: Jonathan Corbet --- include/linux/workqueue.h | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index fc6e22186405..d4f16cf6281c 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -119,18 +119,30 @@ struct delayed_work { int cpu; }; -/* - * A struct for workqueue attributes. This can be used to change - * attributes of an unbound workqueue. +/** + * struct workqueue_attrs - A struct for workqueue attributes. * - * Unlike other fields, ->no_numa isn't a property of a worker_pool. It - * only modifies how apply_workqueue_attrs() select pools and thus doesn't - * participate in pool hash calculations or equality comparisons. + * This can be used to change attributes of an unbound workqueue. */ struct workqueue_attrs { - int nice; /* nice level */ - cpumask_var_t cpumask; /* allowed CPUs */ - bool no_numa; /* disable NUMA affinity */ + /** + * @nice: nice level + */ + int nice; + + /** + * @cpumask: allowed CPUs + */ + cpumask_var_t cpumask; + + /** + * @no_numa: disable NUMA affinity + * + * Unlike other fields, ``no_numa`` isn't a property of a worker_pool. It + * only modifies how :c:func:`apply_workqueue_attrs` select pools and thus + * doesn't participate in pool hash calculations or equality comparisons. + */ + bool no_numa; }; static inline struct delayed_work *to_delayed_work(struct work_struct *work) @@ -272,7 +284,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } /* * Workqueue flags and constants. For details, please refer to - * Documentation/workqueue.txt. + * Documentation/core-api/workqueue.rst. */ enum { WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ @@ -370,7 +382,8 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, * @args...: args for @fmt * * Allocate a workqueue with the specified parameters. For detailed - * information on WQ_* flags, please refer to Documentation/workqueue.txt. + * information on WQ_* flags, please refer to + * Documentation/core-api/workqueue.rst. * * The __lock_name macro dance is to guarantee that single lock_class_key * doesn't end up with different namesm, which isn't allowed by lockdep. -- cgit v1.2.3 From b917783c7b350518f8c5d88bb5848aa8064408a6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Oct 2016 18:49:46 +0200 Subject: flow_dissector: __skb_get_hash_symmetric arg can be const Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- net/core/flow_dissector.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 601258f6e621..663fda2887f7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1086,7 +1086,7 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) } void __skb_get_hash(struct sk_buff *skb); -u32 __skb_get_hash_symmetric(struct sk_buff *skb); +u32 __skb_get_hash_symmetric(const struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 1a7b80f73376..0cc607d05fc8 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -723,7 +723,7 @@ EXPORT_SYMBOL(make_flow_keys_digest); static struct flow_dissector flow_keys_dissector_symmetric __read_mostly; -u32 __skb_get_hash_symmetric(struct sk_buff *skb) +u32 __skb_get_hash_symmetric(const struct sk_buff *skb) { struct flow_keys keys; -- cgit v1.2.3 From 5579e1519bad43b874922dbe87c74fdcbd97a7db Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Wed, 31 Aug 2016 05:17:54 +0000 Subject: net/mlx5: Update struct mlx5_ifc_xrqc_bits Update struct mlx5_ifc_xrqc_bits according to last specification Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6045d4d58065..12f72e45a3f0 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2844,7 +2844,7 @@ struct mlx5_ifc_xrqc_bits { struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context; - u8 reserved_at_180[0x200]; + u8 reserved_at_180[0x880]; struct mlx5_ifc_wq_bits wq; }; -- cgit v1.2.3 From dd257efb1e0f8875ed7e42b88837a8dada0d0e41 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Wed, 31 Aug 2016 05:29:58 +0000 Subject: net/mlx5: Ensure SRQ physical address structure endianness SRQ physical address structure field should be in big-endian format. Signed-off-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky --- include/linux/mlx5/srq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h index 33c97dc900f8..1cde0fd53f90 100644 --- a/include/linux/mlx5/srq.h +++ b/include/linux/mlx5/srq.h @@ -55,7 +55,7 @@ struct mlx5_srq_attr { u32 lwm; u32 user_index; u64 db_record; - u64 *pas; + __be64 *pas; }; struct mlx5_core_dev; -- cgit v1.2.3 From 813f854053c26204e2723c498def4c7870dcc7f4 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Thu, 11 Aug 2016 11:21:39 +0300 Subject: net/mlx5: Introduce TSAR manipulation firmware commands TSAR (stands for Transmit Scheduling ARbiter) is a hardware component that is responsible for selecting the next entity to serve on the transmit path. The arbitration defines the QoS policy between the agents connected to the TSAR. The TSAR is a consist two main features: 1) BW Allocation between agents: The TSAR implements a defecit weighted round robin between the agents. Each agent attached to the TSAR is assigned with a weight and it is awarded transmission tokens according to this weight. 2) Rate limer per agent: Each agent attached to the TSAR is (optionally) assigned with a rate limit. TSAR will not allow scheduling for an agent exceeding its defined rate limit. In this patch we implement the API of manipulating the TSAR. Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 13 +- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 7 + drivers/net/ethernet/mellanox/mlx5/core/rl.c | 65 +++++++ include/linux/mlx5/mlx5_ifc.h | 199 ++++++++++++++++++++- 4 files changed, 279 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 1e639f886021..8561102f2563 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -318,6 +318,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT: case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER: + case MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -419,11 +421,14 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_FLOW_TABLE: case MLX5_CMD_OP_CREATE_FLOW_GROUP: case MLX5_CMD_OP_QUERY_FLOW_GROUP: - case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: case MLX5_CMD_OP_QUERY_FLOW_COUNTER: case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_CREATE_QOS_PARA_VPORT: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -580,6 +585,12 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE); MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER); MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER); + MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(QUERY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(MODIFY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(CREATE_QOS_PARA_VPORT); + MLX5_COMMAND_STR_CASE(DESTROY_QOS_PARA_VPORT); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 3d0cfb9f18f9..bf431715172c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -91,6 +91,13 @@ int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + void *context, u32 *element_id); +int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + void *context, u32 element_id, + u32 modify_bitmask); +int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + u32 element_id); int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev); u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c index 104902a93a0b..e651e4c02867 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -36,6 +36,71 @@ #include #include "mlx5_core.h" +/* Scheduling element fw management */ +int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + void *ctx, u32 *element_id) +{ + u32 in[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {0}; + void *schedc; + int err; + + schedc = MLX5_ADDR_OF(create_scheduling_element_in, in, + scheduling_context); + MLX5_SET(create_scheduling_element_in, in, opcode, + MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT); + MLX5_SET(create_scheduling_element_in, in, scheduling_hierarchy, + hierarchy); + memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context)); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *element_id = MLX5_GET(create_scheduling_element_out, out, + scheduling_element_id); + return 0; +} + +int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + void *ctx, u32 element_id, + u32 modify_bitmask) +{ + u32 in[MLX5_ST_SZ_DW(modify_scheduling_element_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(modify_scheduling_element_in)] = {0}; + void *schedc; + + schedc = MLX5_ADDR_OF(modify_scheduling_element_in, in, + scheduling_context); + MLX5_SET(modify_scheduling_element_in, in, opcode, + MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT); + MLX5_SET(modify_scheduling_element_in, in, scheduling_element_id, + element_id); + MLX5_SET(modify_scheduling_element_in, in, modify_bitmask, + modify_bitmask); + MLX5_SET(modify_scheduling_element_in, in, scheduling_hierarchy, + hierarchy); + memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context)); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, + u32 element_id) +{ + u32 in[MLX5_ST_SZ_DW(destroy_scheduling_element_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_scheduling_element_in)] = {0}; + + MLX5_SET(destroy_scheduling_element_in, in, opcode, + MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT); + MLX5_SET(destroy_scheduling_element_in, in, scheduling_element_id, + element_id); + MLX5_SET(destroy_scheduling_element_in, in, scheduling_hierarchy, + hierarchy); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + /* Finds an entry where we can register the given rate * If the rate already exists, return the entry where it is registered, * otherwise return the first available entry. diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 12f72e45a3f0..2632cb2caf10 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -145,6 +145,12 @@ enum { MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773, MLX5_CMD_OP_SET_RATE_LIMIT = 0x780, MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781, + MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT = 0x782, + MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT = 0x783, + MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT = 0x784, + MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT = 0x785, + MLX5_CMD_OP_CREATE_QOS_PARA_VPORT = 0x786, + MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT = 0x787, MLX5_CMD_OP_ALLOC_PD = 0x800, MLX5_CMD_OP_DEALLOC_PD = 0x801, MLX5_CMD_OP_ALLOC_UAR = 0x802, @@ -537,13 +543,27 @@ struct mlx5_ifc_e_switch_cap_bits { struct mlx5_ifc_qos_cap_bits { u8 packet_pacing[0x1]; - u8 reserved_0[0x1f]; - u8 reserved_1[0x20]; + u8 esw_scheduling[0x1]; + u8 reserved_at_2[0x1e]; + + u8 reserved_at_20[0x20]; + u8 packet_pacing_max_rate[0x20]; + u8 packet_pacing_min_rate[0x20]; - u8 reserved_2[0x10]; + + u8 reserved_at_80[0x10]; u8 packet_pacing_rate_table_size[0x10]; - u8 reserved_3[0x760]; + + u8 esw_element_type[0x10]; + u8 esw_tsar_type[0x10]; + + u8 reserved_at_c0[0x10]; + u8 max_qos_para_vport[0x10]; + + u8 max_tsar_bw_share[0x20]; + + u8 reserved_at_100[0x700]; }; struct mlx5_ifc_per_protocol_networking_offload_caps_bits { @@ -2333,6 +2353,30 @@ struct mlx5_ifc_sqc_bits { struct mlx5_ifc_wq_bits wq; }; +enum { + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR = 0x0, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT = 0x1, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC = 0x2, + SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3, +}; + +struct mlx5_ifc_scheduling_context_bits { + u8 element_type[0x8]; + u8 reserved_at_8[0x18]; + + u8 element_attributes[0x20]; + + u8 parent_element_id[0x20]; + + u8 reserved_at_60[0x40]; + + u8 bw_share[0x20]; + + u8 max_average_bw[0x20]; + + u8 reserved_at_e0[0x120]; +}; + struct mlx5_ifc_rqtc_bits { u8 reserved_at_0[0xa0]; @@ -2920,6 +2964,29 @@ struct mlx5_ifc_register_loopback_control_bits { u8 reserved_at_20[0x60]; }; +struct mlx5_ifc_vport_tc_element_bits { + u8 traffic_class[0x4]; + u8 reserved_at_4[0xc]; + u8 vport_number[0x10]; +}; + +struct mlx5_ifc_vport_element_bits { + u8 reserved_at_0[0x10]; + u8 vport_number[0x10]; +}; + +enum { + TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0, + TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1, + TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2, +}; + +struct mlx5_ifc_tsar_element_bits { + u8 reserved_at_0[0x8]; + u8 tsar_type[0x8]; + u8 reserved_at_10[0x10]; +}; + struct mlx5_ifc_teardown_hca_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -3540,6 +3607,39 @@ struct mlx5_ifc_query_special_contexts_in_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_query_scheduling_element_out_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0xc0]; + + struct mlx5_ifc_scheduling_context_bits scheduling_context; + + u8 reserved_at_300[0x100]; +}; + +enum { + SCHEDULING_HIERARCHY_E_SWITCH = 0x2, +}; + +struct mlx5_ifc_query_scheduling_element_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 scheduling_hierarchy[0x8]; + u8 reserved_at_48[0x18]; + + u8 scheduling_element_id[0x20]; + + u8 reserved_at_80[0x180]; +}; + struct mlx5_ifc_query_rqt_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -4725,6 +4825,43 @@ struct mlx5_ifc_modify_sq_in_bits { struct mlx5_ifc_sqc_bits ctx; }; +struct mlx5_ifc_modify_scheduling_element_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x1c0]; +}; + +enum { + MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE = 0x1, + MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW = 0x2, +}; + +struct mlx5_ifc_modify_scheduling_element_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 scheduling_hierarchy[0x8]; + u8 reserved_at_48[0x18]; + + u8 scheduling_element_id[0x20]; + + u8 reserved_at_80[0x20]; + + u8 modify_bitmask[0x20]; + + u8 reserved_at_c0[0x40]; + + struct mlx5_ifc_scheduling_context_bits scheduling_context; + + u8 reserved_at_300[0x100]; +}; + struct mlx5_ifc_modify_rqt_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -5390,6 +5527,30 @@ struct mlx5_ifc_destroy_sq_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_destroy_scheduling_element_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x1c0]; +}; + +struct mlx5_ifc_destroy_scheduling_element_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 scheduling_hierarchy[0x8]; + u8 reserved_at_48[0x18]; + + u8 scheduling_element_id[0x20]; + + u8 reserved_at_80[0x180]; +}; + struct mlx5_ifc_destroy_rqt_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -6017,6 +6178,36 @@ struct mlx5_ifc_create_sq_in_bits { struct mlx5_ifc_sqc_bits ctx; }; +struct mlx5_ifc_create_scheduling_element_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + u8 scheduling_element_id[0x20]; + + u8 reserved_at_a0[0x160]; +}; + +struct mlx5_ifc_create_scheduling_element_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 scheduling_hierarchy[0x8]; + u8 reserved_at_48[0x18]; + + u8 reserved_at_60[0xa0]; + + struct mlx5_ifc_scheduling_context_bits scheduling_context; + + u8 reserved_at_300[0x100]; +}; + struct mlx5_ifc_create_rqt_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; -- cgit v1.2.3 From 74491de937125d0c98c9b9c9208b4105717a3caa Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 31 Aug 2016 11:24:25 +0000 Subject: net/mlx5: Add multi dest support Currently when calling mlx5_add_flow_rule we accept only one flow destination, this commit allows to pass multiple destinations. This change forces us to change the return structure to a more flexible one. We introduce a flow handle (struct mlx5_flow_handle), it holds internally the number for rules created and holds an array where each cell points the to a flow rule. From the consumers (of mlx5_add_flow_rule) point of view this change is only cosmetic and requires only to change the type of the returned value they store. From the core point of view, we now need to use a loop when allocating and deleting rules (e.g given to us a flow handler). Signed-off-by: Mark Bloch Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 14 +- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 14 +- drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 38 +-- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 49 ++-- .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 19 +- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 6 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 32 +-- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 68 ++--- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 22 +- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 42 +-- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 289 ++++++++++++++------- drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 5 + include/linux/mlx5/fs.h | 28 +- 14 files changed, 374 insertions(+), 254 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d02341eebddb..8e0dbd51944e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1771,13 +1771,13 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) mutex_lock(&dev->flow_db.lock); list_for_each_entry_safe(iter, tmp, &handler->list, list) { - mlx5_del_flow_rule(iter->rule); + mlx5_del_flow_rules(iter->rule); put_flow_table(dev, iter->prio, true); list_del(&iter->list); kfree(iter); } - mlx5_del_flow_rule(handler->rule); + mlx5_del_flow_rules(handler->rule); put_flow_table(dev, handler->prio, true); mutex_unlock(&dev->flow_db.lock); @@ -1907,10 +1907,10 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; - handler->rule = mlx5_add_flow_rule(ft, spec, + handler->rule = mlx5_add_flow_rules(ft, spec, action, MLX5_FS_DEFAULT_FLOW_TAG, - dst); + dst, 1); if (IS_ERR(handler->rule)) { err = PTR_ERR(handler->rule); @@ -1941,7 +1941,7 @@ static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *de handler_dst = create_flow_rule(dev, ft_prio, flow_attr, dst); if (IS_ERR(handler_dst)) { - mlx5_del_flow_rule(handler->rule); + mlx5_del_flow_rules(handler->rule); ft_prio->refcount--; kfree(handler); handler = handler_dst; @@ -2004,7 +2004,7 @@ static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *de &leftovers_specs[LEFTOVERS_UC].flow_attr, dst); if (IS_ERR(handler_ucast)) { - mlx5_del_flow_rule(handler->rule); + mlx5_del_flow_rules(handler->rule); ft_prio->refcount--; kfree(handler); handler = handler_ucast; @@ -2046,7 +2046,7 @@ static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev, return handler_rx; err_tx: - mlx5_del_flow_rule(handler_rx->rule); + mlx5_del_flow_rules(handler_rx->rule); ft_rx->refcount--; kfree(handler_rx); err: diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index dcdcd195fe53..d5d007740159 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -153,7 +153,7 @@ struct mlx5_ib_flow_handler { struct list_head list; struct ib_flow ibflow; struct mlx5_ib_flow_prio *prio; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; }; struct mlx5_ib_flow_db { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 460363b66cb1..47ee8ffe987f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -520,7 +520,7 @@ struct mlx5e_vxlan_db { struct mlx5e_l2_rule { u8 addr[ETH_ALEN + 2]; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; }; struct mlx5e_flow_table { @@ -541,10 +541,10 @@ struct mlx5e_tc_table { struct mlx5e_vlan_table { struct mlx5e_flow_table ft; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; - struct mlx5_flow_rule *active_vlans_rule[VLAN_N_VID]; - struct mlx5_flow_rule *untagged_rule; - struct mlx5_flow_rule *any_vlan_rule; - bool filter_disabled; + struct mlx5_flow_handle *active_vlans_rule[VLAN_N_VID]; + struct mlx5_flow_handle *untagged_rule; + struct mlx5_flow_handle *any_vlan_rule; + bool filter_disabled; }; struct mlx5e_l2_table { @@ -562,14 +562,14 @@ struct mlx5e_l2_table { /* L3/L4 traffic type classifier */ struct mlx5e_ttc_table { struct mlx5e_flow_table ft; - struct mlx5_flow_rule *rules[MLX5E_NUM_TT]; + struct mlx5_flow_handle *rules[MLX5E_NUM_TT]; }; #define ARFS_HASH_SHIFT BITS_PER_BYTE #define ARFS_HASH_SIZE BIT(BITS_PER_BYTE) struct arfs_table { struct mlx5e_flow_table ft; - struct mlx5_flow_rule *default_rule; + struct mlx5_flow_handle *default_rule; struct hlist_head rules_hash[ARFS_HASH_SIZE]; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index a8cb38789774..8ff22e83e1dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -56,7 +56,7 @@ struct arfs_tuple { struct arfs_rule { struct mlx5e_priv *priv; struct work_struct arfs_work; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; struct hlist_node hlist; int rxq; /* Flow ID passed to ndo_rx_flow_steer */ @@ -104,7 +104,7 @@ static int arfs_disable(struct mlx5e_priv *priv) tt = arfs_get_tt(i); /* Modify ttc rules destination to bypass the aRFS tables*/ err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt], - &dest); + &dest, NULL); if (err) { netdev_err(priv->netdev, "%s: modify ttc destination failed\n", @@ -137,7 +137,7 @@ int mlx5e_arfs_enable(struct mlx5e_priv *priv) tt = arfs_get_tt(i); /* Modify ttc rules destination to point on the aRFS FTs */ err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt], - &dest); + &dest, NULL); if (err) { netdev_err(priv->netdev, "%s: modify ttc destination failed err=%d\n", @@ -151,7 +151,7 @@ int mlx5e_arfs_enable(struct mlx5e_priv *priv) static void arfs_destroy_table(struct arfs_table *arfs_t) { - mlx5_del_flow_rule(arfs_t->default_rule); + mlx5_del_flow_rules(arfs_t->default_rule); mlx5e_destroy_flow_table(&arfs_t->ft); } @@ -205,10 +205,10 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, goto out; } - arfs_t->default_rule = mlx5_add_flow_rule(arfs_t->ft.t, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, - &dest); + arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, spec, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, + &dest, 1); if (IS_ERR(arfs_t->default_rule)) { err = PTR_ERR(arfs_t->default_rule); arfs_t->default_rule = NULL; @@ -396,7 +396,7 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv) spin_unlock_bh(&priv->fs.arfs.arfs_lock); hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) { if (arfs_rule->rule) - mlx5_del_flow_rule(arfs_rule->rule); + mlx5_del_flow_rules(arfs_rule->rule); hlist_del(&arfs_rule->hlist); kfree(arfs_rule); } @@ -420,7 +420,7 @@ static void arfs_del_rules(struct mlx5e_priv *priv) hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) { cancel_work_sync(&rule->arfs_work); if (rule->rule) - mlx5_del_flow_rule(rule->rule); + mlx5_del_flow_rules(rule->rule); hlist_del(&rule->hlist); kfree(rule); } @@ -462,12 +462,12 @@ static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs, return NULL; } -static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv, - struct arfs_rule *arfs_rule) +static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, + struct arfs_rule *arfs_rule) { struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; struct arfs_tuple *tuple = &arfs_rule->tuple; - struct mlx5_flow_rule *rule = NULL; + struct mlx5_flow_handle *rule = NULL; struct mlx5_flow_destination dest; struct arfs_table *arfs_table; struct mlx5_flow_spec *spec; @@ -544,9 +544,9 @@ static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv, } dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn; - rule = mlx5_add_flow_rule(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, - &dest); + rule = mlx5_add_flow_rules(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, + &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); netdev_err(priv->netdev, "%s: add rule(filter id=%d, rq idx=%d) failed, err=%d\n", @@ -559,14 +559,14 @@ out: } static void arfs_modify_rule_rq(struct mlx5e_priv *priv, - struct mlx5_flow_rule *rule, u16 rxq) + struct mlx5_flow_handle *rule, u16 rxq) { struct mlx5_flow_destination dst; int err = 0; dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR; dst.tir_num = priv->direct_tir[rxq].tirn; - err = mlx5_modify_rule_destination(rule, &dst); + err = mlx5_modify_rule_destination(rule, &dst, NULL); if (err) netdev_warn(priv->netdev, "Failed to modfiy aRFS rule destination to rq=%d\n", rxq); @@ -578,7 +578,7 @@ static void arfs_handle_work(struct work_struct *work) struct arfs_rule, arfs_work); struct mlx5e_priv *priv = arfs_rule->priv; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; mutex_lock(&priv->state_lock); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 36fbc6b21a33..bed544d47ba1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -160,7 +160,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, { struct mlx5_flow_table *ft = priv->fs.vlan.ft.t; struct mlx5_flow_destination dest; - struct mlx5_flow_rule **rule_p; + struct mlx5_flow_handle **rule_p; int err = 0; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; @@ -187,10 +187,10 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, break; } - *rule_p = mlx5_add_flow_rule(ft, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, - &dest); + *rule_p = mlx5_add_flow_rules(ft, spec, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, + &dest, 1); if (IS_ERR(*rule_p)) { err = PTR_ERR(*rule_p); @@ -229,20 +229,20 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: if (priv->fs.vlan.untagged_rule) { - mlx5_del_flow_rule(priv->fs.vlan.untagged_rule); + mlx5_del_flow_rules(priv->fs.vlan.untagged_rule); priv->fs.vlan.untagged_rule = NULL; } break; case MLX5E_VLAN_RULE_TYPE_ANY_VID: if (priv->fs.vlan.any_vlan_rule) { - mlx5_del_flow_rule(priv->fs.vlan.any_vlan_rule); + mlx5_del_flow_rules(priv->fs.vlan.any_vlan_rule); priv->fs.vlan.any_vlan_rule = NULL; } break; case MLX5E_VLAN_RULE_TYPE_MATCH_VID: mlx5e_vport_context_update_vlans(priv); if (priv->fs.vlan.active_vlans_rule[vid]) { - mlx5_del_flow_rule(priv->fs.vlan.active_vlans_rule[vid]); + mlx5_del_flow_rules(priv->fs.vlan.active_vlans_rule[vid]); priv->fs.vlan.active_vlans_rule[vid] = NULL; } mlx5e_vport_context_update_vlans(priv); @@ -560,7 +560,7 @@ static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc) for (i = 0; i < MLX5E_NUM_TT; i++) { if (!IS_ERR_OR_NULL(ttc->rules[i])) { - mlx5_del_flow_rule(ttc->rules[i]); + mlx5_del_flow_rules(ttc->rules[i]); ttc->rules[i] = NULL; } } @@ -616,13 +616,14 @@ static struct { }, }; -static struct mlx5_flow_rule *mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, - struct mlx5_flow_table *ft, - struct mlx5_flow_destination *dest, - u16 etype, - u8 proto) +static struct mlx5_flow_handle * +mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, + struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, + u16 etype, + u8 proto) { - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; int err = 0; @@ -643,10 +644,10 @@ static struct mlx5_flow_rule *mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); } - rule = mlx5_add_flow_rule(ft, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, - dest); + rule = mlx5_add_flow_rules(ft, spec, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, + dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); netdev_err(priv->netdev, "%s: add rule failed\n", __func__); @@ -660,7 +661,7 @@ static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv) { struct mlx5_flow_destination dest; struct mlx5e_ttc_table *ttc; - struct mlx5_flow_rule **rules; + struct mlx5_flow_handle **rules; struct mlx5_flow_table *ft; int tt; int err; @@ -801,7 +802,7 @@ static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, struct mlx5e_l2_rule *ai) { if (!IS_ERR_OR_NULL(ai->rule)) { - mlx5_del_flow_rule(ai->rule); + mlx5_del_flow_rules(ai->rule); ai->rule = NULL; } } @@ -847,9 +848,9 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, break; } - ai->rule = mlx5_add_flow_rule(ft, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); + ai->rule = mlx5_add_flow_rules(ft, spec, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest, 1); if (IS_ERR(ai->rule)) { netdev_err(priv->netdev, "%s: add l2 rule(mac:%pM) failed\n", __func__, mv_dmac); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index d17c24227900..cf52c06377f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -36,7 +36,7 @@ struct mlx5e_ethtool_rule { struct list_head list; struct ethtool_rx_flow_spec flow_spec; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; struct mlx5e_ethtool_table *eth_ft; }; @@ -284,13 +284,14 @@ static bool outer_header_zero(u32 *match_criteria) size - 1); } -static struct mlx5_flow_rule *add_ethtool_flow_rule(struct mlx5e_priv *priv, - struct mlx5_flow_table *ft, - struct ethtool_rx_flow_spec *fs) +static struct mlx5_flow_handle * +add_ethtool_flow_rule(struct mlx5e_priv *priv, + struct mlx5_flow_table *ft, + struct ethtool_rx_flow_spec *fs) { struct mlx5_flow_destination *dst = NULL; struct mlx5_flow_spec *spec; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; int err = 0; u32 action; @@ -317,8 +318,8 @@ static struct mlx5_flow_rule *add_ethtool_flow_rule(struct mlx5e_priv *priv, } spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)); - rule = mlx5_add_flow_rule(ft, spec, action, - MLX5_FS_DEFAULT_FLOW_TAG, dst); + rule = mlx5_add_flow_rules(ft, spec, action, + MLX5_FS_DEFAULT_FLOW_TAG, dst, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n", @@ -335,7 +336,7 @@ static void del_ethtool_rule(struct mlx5e_priv *priv, struct mlx5e_ethtool_rule *eth_rule) { if (eth_rule->rule) - mlx5_del_flow_rule(eth_rule->rule); + mlx5_del_flow_rules(eth_rule->rule); list_del(ð_rule->list); priv->fs.ethtool.tot_num_rules--; put_flow_table(eth_rule->eth_ft); @@ -475,7 +476,7 @@ int mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, { struct mlx5e_ethtool_table *eth_ft; struct mlx5e_ethtool_rule *eth_rule; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; int num_tuples; int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 3c97da103d30..88d3fd132d63 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -328,7 +328,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_eswitch_rep *rep = priv->ppriv; struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_flow_rule *flow_rule; + struct mlx5_flow_handle *flow_rule; int err; int i; @@ -360,7 +360,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) return 0; err_del_flow_rule: - mlx5_del_flow_rule(rep->vport_rx_rule); + mlx5_del_flow_rules(rep->vport_rx_rule); err_destroy_direct_tirs: mlx5e_destroy_direct_tirs(priv); err_destroy_direct_rqts: @@ -375,7 +375,7 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) int i; mlx5e_tc_cleanup(priv); - mlx5_del_flow_rule(rep->vport_rx_rule); + mlx5_del_flow_rules(rep->vport_rx_rule); mlx5e_destroy_direct_tirs(priv); for (i = 0; i < priv->params.num_channels; i++) mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index ce8c54d18906..5d9ac0dbf3bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -47,21 +47,22 @@ struct mlx5e_tc_flow { struct rhash_head node; u64 cookie; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; struct mlx5_esw_flow_attr *attr; }; #define MLX5E_TC_TABLE_NUM_ENTRIES 1024 #define MLX5E_TC_TABLE_NUM_GROUPS 4 -static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - u32 action, u32 flow_tag) +static struct mlx5_flow_handle * +mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + u32 action, u32 flow_tag) { struct mlx5_core_dev *dev = priv->mdev; struct mlx5_flow_destination dest = { 0 }; struct mlx5_fc *counter = NULL; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; bool table_created = false; if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { @@ -94,9 +95,9 @@ static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, } spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - rule = mlx5_add_flow_rule(priv->fs.tc.t, spec, - action, flow_tag, - &dest); + rule = mlx5_add_flow_rules(priv->fs.tc.t, spec, + action, flow_tag, + &dest, 1); if (IS_ERR(rule)) goto err_add_rule; @@ -114,9 +115,10 @@ err_create_ft: return rule; } -static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct mlx5_esw_flow_attr *attr) +static struct mlx5_flow_handle * +mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; int err; @@ -129,7 +131,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, } static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, - struct mlx5_flow_rule *rule, + struct mlx5_flow_handle *rule, struct mlx5_esw_flow_attr *attr) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -140,7 +142,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, if (esw && esw->mode == SRIOV_OFFLOADS) mlx5_eswitch_del_vlan_action(esw, attr); - mlx5_del_flow_rule(rule); + mlx5_del_flow_rules(rule); mlx5_fc_destroy(priv->mdev, counter); @@ -450,7 +452,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, u32 flow_tag, action; struct mlx5e_tc_flow *flow; struct mlx5_flow_spec *spec; - struct mlx5_flow_rule *old = NULL; + struct mlx5_flow_handle *old = NULL; struct mlx5_esw_flow_attr *old_attr = NULL; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -511,7 +513,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, goto out; err_del_rule: - mlx5_del_flow_rule(flow->rule); + mlx5_del_flow_rules(flow->rule); err_free: if (!old) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 9ef01d1bea06..fcd8b15f6625 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -56,7 +56,7 @@ struct esw_uc_addr { /* E-Switch MC FDB table hash node */ struct esw_mc_addr { /* SRIOV only */ struct l2addr_node node; - struct mlx5_flow_rule *uplink_rule; /* Forward to uplink rule */ + struct mlx5_flow_handle *uplink_rule; /* Forward to uplink rule */ u32 refcnt; }; @@ -65,7 +65,7 @@ struct vport_addr { struct l2addr_node node; u8 action; u32 vport; - struct mlx5_flow_rule *flow_rule; /* SRIOV only */ + struct mlx5_flow_handle *flow_rule; /* SRIOV only */ /* A flag indicating that mac was added due to mc promiscuous vport */ bool mc_promisc; }; @@ -237,13 +237,13 @@ static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index) } /* E-Switch FDB */ -static struct mlx5_flow_rule * +static struct mlx5_flow_handle * __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN]) { int match_header = (is_zero_ether_addr(mac_c) ? 0 : MLX5_MATCH_OUTER_HEADERS); - struct mlx5_flow_rule *flow_rule = NULL; + struct mlx5_flow_handle *flow_rule = NULL; struct mlx5_flow_destination dest; struct mlx5_flow_spec *spec; void *mv_misc = NULL; @@ -286,9 +286,9 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, dmac_v, dmac_c, vport); spec->match_criteria_enable = match_header; flow_rule = - mlx5_add_flow_rule(esw->fdb_table.fdb, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - 0, &dest); + mlx5_add_flow_rules(esw->fdb_table.fdb, spec, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest, 1); if (IS_ERR(flow_rule)) { esw_warn(esw->dev, "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", @@ -300,7 +300,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, return flow_rule; } -static struct mlx5_flow_rule * +static struct mlx5_flow_handle * esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) { u8 mac_c[ETH_ALEN]; @@ -309,7 +309,7 @@ esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac); } -static struct mlx5_flow_rule * +static struct mlx5_flow_handle * esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport) { u8 mac_c[ETH_ALEN]; @@ -322,7 +322,7 @@ esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport) return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac_v); } -static struct mlx5_flow_rule * +static struct mlx5_flow_handle * esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport) { u8 mac_c[ETH_ALEN]; @@ -515,7 +515,7 @@ static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) del_l2_table_entry(esw->dev, esw_uc->table_index); if (vaddr->flow_rule) - mlx5_del_flow_rule(vaddr->flow_rule); + mlx5_del_flow_rules(vaddr->flow_rule); vaddr->flow_rule = NULL; l2addr_hash_del(esw_uc); @@ -562,7 +562,7 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw, case MLX5_ACTION_DEL: if (!iter_vaddr) continue; - mlx5_del_flow_rule(iter_vaddr->flow_rule); + mlx5_del_flow_rules(iter_vaddr->flow_rule); l2addr_hash_del(iter_vaddr); break; } @@ -632,7 +632,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) esw_mc->uplink_rule); if (vaddr->flow_rule) - mlx5_del_flow_rule(vaddr->flow_rule); + mlx5_del_flow_rules(vaddr->flow_rule); vaddr->flow_rule = NULL; /* If the multicast mac is added as a result of mc promiscuous vport, @@ -645,7 +645,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) update_allmulti_vports(esw, vaddr, esw_mc); if (esw_mc->uplink_rule) - mlx5_del_flow_rule(esw_mc->uplink_rule); + mlx5_del_flow_rules(esw_mc->uplink_rule); l2addr_hash_del(esw_mc); return 0; @@ -828,14 +828,14 @@ static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num, UPLINK_VPORT); allmulti_addr->refcnt++; } else if (vport->allmulti_rule) { - mlx5_del_flow_rule(vport->allmulti_rule); + mlx5_del_flow_rules(vport->allmulti_rule); vport->allmulti_rule = NULL; if (--allmulti_addr->refcnt > 0) goto promisc; if (allmulti_addr->uplink_rule) - mlx5_del_flow_rule(allmulti_addr->uplink_rule); + mlx5_del_flow_rules(allmulti_addr->uplink_rule); allmulti_addr->uplink_rule = NULL; } @@ -847,7 +847,7 @@ promisc: vport->promisc_rule = esw_fdb_set_vport_promisc_rule(esw, vport_num); } else if (vport->promisc_rule) { - mlx5_del_flow_rule(vport->promisc_rule); + mlx5_del_flow_rules(vport->promisc_rule); vport->promisc_rule = NULL; } } @@ -1015,10 +1015,10 @@ static void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) - mlx5_del_flow_rule(vport->egress.allowed_vlan); + mlx5_del_flow_rules(vport->egress.allowed_vlan); if (!IS_ERR_OR_NULL(vport->egress.drop_rule)) - mlx5_del_flow_rule(vport->egress.drop_rule); + mlx5_del_flow_rules(vport->egress.drop_rule); vport->egress.allowed_vlan = NULL; vport->egress.drop_rule = NULL; @@ -1173,10 +1173,10 @@ static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { if (!IS_ERR_OR_NULL(vport->ingress.drop_rule)) - mlx5_del_flow_rule(vport->ingress.drop_rule); + mlx5_del_flow_rules(vport->ingress.drop_rule); if (!IS_ERR_OR_NULL(vport->ingress.allow_rule)) - mlx5_del_flow_rule(vport->ingress.allow_rule); + mlx5_del_flow_rules(vport->ingress.allow_rule); vport->ingress.drop_rule = NULL; vport->ingress.allow_rule = NULL; @@ -1253,9 +1253,9 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; vport->ingress.allow_rule = - mlx5_add_flow_rule(vport->ingress.acl, spec, - MLX5_FLOW_CONTEXT_ACTION_ALLOW, - 0, NULL); + mlx5_add_flow_rules(vport->ingress.acl, spec, + MLX5_FLOW_CONTEXT_ACTION_ALLOW, + 0, NULL, 0); if (IS_ERR(vport->ingress.allow_rule)) { err = PTR_ERR(vport->ingress.allow_rule); esw_warn(esw->dev, @@ -1267,9 +1267,9 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, memset(spec, 0, sizeof(*spec)); vport->ingress.drop_rule = - mlx5_add_flow_rule(vport->ingress.acl, spec, - MLX5_FLOW_CONTEXT_ACTION_DROP, - 0, NULL); + mlx5_add_flow_rules(vport->ingress.acl, spec, + MLX5_FLOW_CONTEXT_ACTION_DROP, + 0, NULL, 0); if (IS_ERR(vport->ingress.drop_rule)) { err = PTR_ERR(vport->ingress.drop_rule); esw_warn(esw->dev, @@ -1321,9 +1321,9 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; vport->egress.allowed_vlan = - mlx5_add_flow_rule(vport->egress.acl, spec, - MLX5_FLOW_CONTEXT_ACTION_ALLOW, - 0, NULL); + mlx5_add_flow_rules(vport->egress.acl, spec, + MLX5_FLOW_CONTEXT_ACTION_ALLOW, + 0, NULL, 0); if (IS_ERR(vport->egress.allowed_vlan)) { err = PTR_ERR(vport->egress.allowed_vlan); esw_warn(esw->dev, @@ -1336,9 +1336,9 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, /* Drop others rule (star rule) */ memset(spec, 0, sizeof(*spec)); vport->egress.drop_rule = - mlx5_add_flow_rule(vport->egress.acl, spec, - MLX5_FLOW_CONTEXT_ACTION_DROP, - 0, NULL); + mlx5_add_flow_rules(vport->egress.acl, spec, + MLX5_FLOW_CONTEXT_ACTION_DROP, + 0, NULL, 0); if (IS_ERR(vport->egress.drop_rule)) { err = PTR_ERR(vport->egress.drop_rule); esw_warn(esw->dev, @@ -1667,7 +1667,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw_disable_vport(esw, i); if (mc_promisc && mc_promisc->uplink_rule) - mlx5_del_flow_rule(mc_promisc->uplink_rule); + mlx5_del_flow_rules(mc_promisc->uplink_rule); esw_destroy_tsar(esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index ddae90c1f15b..6d414cb1b75f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -97,16 +97,16 @@ struct vport_ingress { struct mlx5_flow_group *allow_spoofchk_only_grp; struct mlx5_flow_group *allow_untagged_only_grp; struct mlx5_flow_group *drop_grp; - struct mlx5_flow_rule *allow_rule; - struct mlx5_flow_rule *drop_rule; + struct mlx5_flow_handle *allow_rule; + struct mlx5_flow_handle *drop_rule; }; struct vport_egress { struct mlx5_flow_table *acl; struct mlx5_flow_group *allowed_vlans_grp; struct mlx5_flow_group *drop_grp; - struct mlx5_flow_rule *allowed_vlan; - struct mlx5_flow_rule *drop_rule; + struct mlx5_flow_handle *allowed_vlan; + struct mlx5_flow_handle *drop_rule; }; struct mlx5_vport_info { @@ -125,8 +125,8 @@ struct mlx5_vport { int vport; struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE]; - struct mlx5_flow_rule *promisc_rule; - struct mlx5_flow_rule *allmulti_rule; + struct mlx5_flow_handle *promisc_rule; + struct mlx5_flow_handle *allmulti_rule; struct work_struct vport_change_handler; struct vport_ingress ingress; @@ -162,7 +162,7 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_table *fdb; struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *miss_grp; - struct mlx5_flow_rule *miss_rule; + struct mlx5_flow_handle *miss_rule; int vlan_push_pop_refcount; } offloads; }; @@ -175,7 +175,7 @@ enum { }; struct mlx5_esw_sq { - struct mlx5_flow_rule *send_to_vport_rule; + struct mlx5_flow_handle *send_to_vport_rule; struct list_head list; }; @@ -188,7 +188,7 @@ struct mlx5_eswitch_rep { u8 hw_id[ETH_ALEN]; void *priv_data; - struct mlx5_flow_rule *vport_rx_rule; + struct mlx5_flow_handle *vport_rx_rule; struct list_head vport_sqs_list; u16 vlan; u32 vlan_refcount; @@ -257,11 +257,11 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, struct mlx5_flow_spec; struct mlx5_esw_flow_attr; -struct mlx5_flow_rule * +struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_esw_flow_attr *attr); -struct mlx5_flow_rule * +struct mlx5_flow_handle * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); enum { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index c55ad8d00c05..8b2a3832cd0a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -43,14 +43,14 @@ enum { FDB_SLOW_PATH }; -struct mlx5_flow_rule * +struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_esw_flow_attr *attr) { struct mlx5_flow_destination dest = { 0 }; struct mlx5_fc *counter = NULL; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; void *misc; int action; @@ -80,8 +80,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS; - rule = mlx5_add_flow_rule((struct mlx5_flow_table *)esw->fdb_table.fdb, - spec, action, 0, &dest); + rule = mlx5_add_flow_rules((struct mlx5_flow_table *)esw->fdb_table.fdb, + spec, action, 0, &dest, 1); if (IS_ERR(rule)) mlx5_fc_destroy(esw->dev, counter); @@ -269,11 +269,11 @@ out: return err; } -static struct mlx5_flow_rule * +static struct mlx5_flow_handle * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) { struct mlx5_flow_destination dest; - struct mlx5_flow_rule *flow_rule; + struct mlx5_flow_handle *flow_rule; struct mlx5_flow_spec *spec; void *misc; @@ -296,9 +296,9 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport_num = vport; - flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - 0, &dest); + flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest, 1); if (IS_ERR(flow_rule)) esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule)); out: @@ -315,7 +315,7 @@ void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw, return; list_for_each_entry_safe(esw_sq, tmp, &rep->vport_sqs_list, list) { - mlx5_del_flow_rule(esw_sq->send_to_vport_rule); + mlx5_del_flow_rules(esw_sq->send_to_vport_rule); list_del(&esw_sq->list); kfree(esw_sq); } @@ -325,7 +325,7 @@ int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u16 *sqns_array, int sqns_num) { - struct mlx5_flow_rule *flow_rule; + struct mlx5_flow_handle *flow_rule; struct mlx5_esw_sq *esw_sq; int err; int i; @@ -362,7 +362,7 @@ out_err: static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) { struct mlx5_flow_destination dest; - struct mlx5_flow_rule *flow_rule = NULL; + struct mlx5_flow_handle *flow_rule = NULL; struct mlx5_flow_spec *spec; int err = 0; @@ -376,9 +376,9 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport_num = 0; - flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - 0, &dest); + flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest, 1); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); esw_warn(esw->dev, "FDB: Failed to add miss flow rule err %d\n", err); @@ -501,7 +501,7 @@ static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) return; esw_debug(esw->dev, "Destroy offloads FDB Table\n"); - mlx5_del_flow_rule(esw->fdb_table.offloads.miss_rule); + mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); @@ -585,11 +585,11 @@ static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw) mlx5_destroy_flow_group(esw->offloads.vport_rx_group); } -struct mlx5_flow_rule * +struct mlx5_flow_handle * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) { struct mlx5_flow_destination dest; - struct mlx5_flow_rule *flow_rule; + struct mlx5_flow_handle *flow_rule; struct mlx5_flow_spec *spec; void *misc; @@ -610,9 +610,9 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; dest.tir_num = tirn; - flow_rule = mlx5_add_flow_rule(esw->offloads.ft_offloads, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - 0, &dest); + flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest, 1); if (IS_ERR(flow_rule)) { esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule)); goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 43d7052c76fc..6732287a98c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -155,6 +155,9 @@ static void del_flow_group(struct fs_node *node); static void del_fte(struct fs_node *node); static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, struct mlx5_flow_destination *d2); +static struct mlx5_flow_rule * +find_flow_rule(struct fs_fte *fte, + struct mlx5_flow_destination *dest); static void tree_init_node(struct fs_node *node, unsigned int refcount, @@ -640,8 +643,8 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio return err; } -int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, - struct mlx5_flow_destination *dest) +static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, + struct mlx5_flow_destination *dest) { struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; @@ -666,6 +669,28 @@ int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, return err; } +int mlx5_modify_rule_destination(struct mlx5_flow_handle *handle, + struct mlx5_flow_destination *new_dest, + struct mlx5_flow_destination *old_dest) +{ + int i; + + if (!old_dest) { + if (handle->num_rules != 1) + return -EINVAL; + return _mlx5_modify_rule_destination(handle->rule[0], + new_dest); + } + + for (i = 0; i < handle->num_rules; i++) { + if (mlx5_flow_dests_cmp(new_dest, &handle->rule[i]->dest_attr)) + return _mlx5_modify_rule_destination(handle->rule[i], + new_dest); + } + + return -EINVAL; +} + /* Modify/set FWD rules that point on old_next_ft to point on new_next_ft */ static int connect_fwd_rules(struct mlx5_core_dev *dev, struct mlx5_flow_table *new_next_ft, @@ -688,7 +713,7 @@ static int connect_fwd_rules(struct mlx5_core_dev *dev, list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules); mutex_unlock(&old_next_ft->lock); list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) { - err = mlx5_modify_rule_destination(iter, &dest); + err = _mlx5_modify_rule_destination(iter, &dest); if (err) pr_err("mlx5_core: failed to modify rule to point on flow table %d\n", new_next_ft->id); @@ -917,41 +942,117 @@ static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest) return rule; } -/* fte should not be deleted while calling this function */ -static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte, - struct mlx5_flow_group *fg, - struct mlx5_flow_destination *dest, - bool update_action) +static struct mlx5_flow_handle *alloc_handle(int num_rules) { + struct mlx5_flow_handle *handle; + + handle = kzalloc(sizeof(*handle) + sizeof(handle->rule[0]) * + num_rules, GFP_KERNEL); + if (!handle) + return NULL; + + handle->num_rules = num_rules; + + return handle; +} + +static void destroy_flow_handle(struct fs_fte *fte, + struct mlx5_flow_handle *handle, + struct mlx5_flow_destination *dest, + int i) +{ + for (; --i >= 0;) { + if (atomic_dec_and_test(&handle->rule[i]->node.refcount)) { + fte->dests_size--; + list_del(&handle->rule[i]->node.list); + kfree(handle->rule[i]); + } + } + kfree(handle); +} + +static struct mlx5_flow_handle * +create_flow_handle(struct fs_fte *fte, + struct mlx5_flow_destination *dest, + int dest_num, + int *modify_mask, + bool *new_rule) +{ + struct mlx5_flow_handle *handle; + struct mlx5_flow_rule *rule = NULL; + static int count = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS); + static int dst = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); + int type; + int i = 0; + + handle = alloc_handle((dest_num) ? dest_num : 1); + if (!handle) + return ERR_PTR(-ENOMEM); + + do { + if (dest) { + rule = find_flow_rule(fte, dest + i); + if (rule) { + atomic_inc(&rule->node.refcount); + goto rule_found; + } + } + + *new_rule = true; + rule = alloc_rule(dest + i); + if (!rule) + goto free_rules; + + /* Add dest to dests list- we need flow tables to be in the + * end of the list for forward to next prio rules. + */ + tree_init_node(&rule->node, 1, del_rule); + if (dest && + dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) + list_add(&rule->node.list, &fte->node.children); + else + list_add_tail(&rule->node.list, &fte->node.children); + if (dest) { + fte->dests_size++; + + type = dest[i].type == + MLX5_FLOW_DESTINATION_TYPE_COUNTER; + *modify_mask |= type ? count : dst; + } +rule_found: + handle->rule[i] = rule; + } while (++i < dest_num); + + return handle; + +free_rules: + destroy_flow_handle(fte, handle, dest, i); + return ERR_PTR(-ENOMEM); +} + +/* fte should not be deleted while calling this function */ +static struct mlx5_flow_handle * +add_rule_fte(struct fs_fte *fte, + struct mlx5_flow_group *fg, + struct mlx5_flow_destination *dest, + int dest_num, + bool update_action) +{ + struct mlx5_flow_handle *handle; struct mlx5_flow_table *ft; - struct mlx5_flow_rule *rule; int modify_mask = 0; int err; + bool new_rule = false; - rule = alloc_rule(dest); - if (!rule) - return ERR_PTR(-ENOMEM); + handle = create_flow_handle(fte, dest, dest_num, &modify_mask, + &new_rule); + if (IS_ERR(handle) || !new_rule) + goto out; if (update_action) modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION); fs_get_obj(ft, fg->node.parent); - /* Add dest to dests list- we need flow tables to be in the - * end of the list for forward to next prio rules. - */ - tree_init_node(&rule->node, 1, del_rule); - if (dest && dest->type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) - list_add(&rule->node.list, &fte->node.children); - else - list_add_tail(&rule->node.list, &fte->node.children); - if (dest) { - fte->dests_size++; - - modify_mask |= dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER ? - BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS) : - BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); - } - if (!(fte->status & FS_FTE_STATUS_EXISTING)) err = mlx5_cmd_create_fte(get_dev(&ft->node), ft, fg->id, fte); @@ -959,17 +1060,15 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte, err = mlx5_cmd_update_fte(get_dev(&ft->node), ft, fg->id, modify_mask, fte); if (err) - goto free_rule; + goto free_handle; fte->status |= FS_FTE_STATUS_EXISTING; - return rule; +out: + return handle; -free_rule: - list_del(&rule->node.list); - kfree(rule); - if (dest) - fte->dests_size--; +free_handle: + destroy_flow_handle(fte, handle, dest, handle->num_rules); return ERR_PTR(err); } @@ -1098,16 +1197,18 @@ static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte, return NULL; } -static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, - u32 *match_value, - u8 action, - u32 flow_tag, - struct mlx5_flow_destination *dest) +static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, + u32 *match_value, + u8 action, + u32 flow_tag, + struct mlx5_flow_destination *dest, + int dest_num) { - struct fs_fte *fte; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *handle; struct mlx5_flow_table *ft; struct list_head *prev; + struct fs_fte *fte; + int i; nested_lock_ref_node(&fg->node, FS_MUTEX_PARENT); fs_for_each_fte(fte, fg) { @@ -1116,40 +1217,33 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, (action & fte->action) && flow_tag == fte->flow_tag) { int old_action = fte->action; - rule = find_flow_rule(fte, dest); - if (rule) { - atomic_inc(&rule->node.refcount); - unlock_ref_node(&fte->node); - unlock_ref_node(&fg->node); - return rule; - } fte->action |= action; - rule = add_rule_fte(fte, fg, dest, - old_action != action); - if (IS_ERR(rule)) { + handle = add_rule_fte(fte, fg, dest, dest_num, + old_action != action); + if (IS_ERR(handle)) { fte->action = old_action; goto unlock_fte; } else { - goto add_rule; + goto add_rules; } } unlock_ref_node(&fte->node); } fs_get_obj(ft, fg->node.parent); if (fg->num_ftes >= fg->max_ftes) { - rule = ERR_PTR(-ENOSPC); + handle = ERR_PTR(-ENOSPC); goto unlock_fg; } fte = create_fte(fg, match_value, action, flow_tag, &prev); if (IS_ERR(fte)) { - rule = (void *)fte; + handle = (void *)fte; goto unlock_fg; } tree_init_node(&fte->node, 0, del_fte); nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); - rule = add_rule_fte(fte, fg, dest, false); - if (IS_ERR(rule)) { + handle = add_rule_fte(fte, fg, dest, dest_num, false); + if (IS_ERR(handle)) { kfree(fte); goto unlock_fg; } @@ -1158,21 +1252,24 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg, tree_add_node(&fte->node, &fg->node); list_add(&fte->node.list, prev); -add_rule: - tree_add_node(&rule->node, &fte->node); +add_rules: + for (i = 0; i < handle->num_rules; i++) { + if (atomic_read(&handle->rule[i]->node.refcount) == 1) + tree_add_node(&handle->rule[i]->node, &fte->node); + } unlock_fte: unlock_ref_node(&fte->node); unlock_fg: unlock_ref_node(&fg->node); - return rule; + return handle; } -struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_rule *rule) +struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handle) { struct mlx5_flow_rule *dst; struct fs_fte *fte; - fs_get_obj(fte, rule->node.parent); + fs_get_obj(fte, handle->rule[0]->node.parent); fs_for_each_dst(dst, fte) { if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) @@ -1211,18 +1308,22 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest, return true; } -static struct mlx5_flow_rule * -_mlx5_add_flow_rule(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec, - u32 action, - u32 flow_tag, - struct mlx5_flow_destination *dest) +static struct mlx5_flow_handle * +_mlx5_add_flow_rules(struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec, + u32 action, + u32 flow_tag, + struct mlx5_flow_destination *dest, + int dest_num) { struct mlx5_flow_group *g; - struct mlx5_flow_rule *rule; + struct mlx5_flow_handle *rule; + int i; - if (!dest_is_valid(dest, action, ft)) - return ERR_PTR(-EINVAL); + for (i = 0; i < dest_num; i++) { + if (!dest_is_valid(&dest[i], action, ft)) + return ERR_PTR(-EINVAL); + } nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); fs_for_each_fg(g, ft) @@ -1231,7 +1332,7 @@ _mlx5_add_flow_rule(struct mlx5_flow_table *ft, g->mask.match_criteria, spec->match_criteria)) { rule = add_rule_fg(g, spec->match_value, - action, flow_tag, dest); + action, flow_tag, dest, dest_num); if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) goto unlock; } @@ -1244,7 +1345,7 @@ _mlx5_add_flow_rule(struct mlx5_flow_table *ft, } rule = add_rule_fg(g, spec->match_value, - action, flow_tag, dest); + action, flow_tag, dest, dest_num); if (IS_ERR(rule)) { /* Remove assumes refcount > 0 and autogroup creates a group * with a refcount = 0. @@ -1265,17 +1366,18 @@ static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) (MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs))); } -struct mlx5_flow_rule * -mlx5_add_flow_rule(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec, - u32 action, - u32 flow_tag, - struct mlx5_flow_destination *dest) +struct mlx5_flow_handle * +mlx5_add_flow_rules(struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec, + u32 action, + u32 flow_tag, + struct mlx5_flow_destination *dest, + int dest_num) { struct mlx5_flow_root_namespace *root = find_root(&ft->node); struct mlx5_flow_destination gen_dest; struct mlx5_flow_table *next_ft = NULL; - struct mlx5_flow_rule *rule = NULL; + struct mlx5_flow_handle *handle = NULL; u32 sw_action = action; struct fs_prio *prio; @@ -1291,6 +1393,7 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft, gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; gen_dest.ft = next_ft; dest = &gen_dest; + dest_num = 1; action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } else { mutex_unlock(&root->chain_lock); @@ -1298,27 +1401,33 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft, } } - rule = _mlx5_add_flow_rule(ft, spec, action, flow_tag, dest); + handle = _mlx5_add_flow_rules(ft, spec, action, flow_tag, dest, + dest_num); if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { - if (!IS_ERR_OR_NULL(rule) && - (list_empty(&rule->next_ft))) { + if (!IS_ERR_OR_NULL(handle) && + (list_empty(&handle->rule[0]->next_ft))) { mutex_lock(&next_ft->lock); - list_add(&rule->next_ft, &next_ft->fwd_rules); + list_add(&handle->rule[0]->next_ft, + &next_ft->fwd_rules); mutex_unlock(&next_ft->lock); - rule->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + handle->rule[0]->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; } mutex_unlock(&root->chain_lock); } - return rule; + return handle; } -EXPORT_SYMBOL(mlx5_add_flow_rule); +EXPORT_SYMBOL(mlx5_add_flow_rules); -void mlx5_del_flow_rule(struct mlx5_flow_rule *rule) +void mlx5_del_flow_rules(struct mlx5_flow_handle *handle) { - tree_remove_node(&rule->node); + int i; + + for (i = handle->num_rules - 1; i >= 0; i--) + tree_remove_node(&handle->rule[i]->node); + kfree(handle); } -EXPORT_SYMBOL(mlx5_del_flow_rule); +EXPORT_SYMBOL(mlx5_del_flow_rules); /* Assuming prio->node.children(flow tables) is sorted by level */ static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 71ff03bceabb..d5150888645c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -94,6 +94,11 @@ struct mlx5_flow_rule { u32 sw_action; }; +struct mlx5_flow_handle { + int num_rules; + struct mlx5_flow_rule *rule[]; +}; + /* Type of children is mlx5_flow_group */ struct mlx5_flow_table { struct fs_node node; diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 93ebc5e21334..0dcd287f4bd0 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -69,8 +69,8 @@ enum mlx5_flow_namespace_type { struct mlx5_flow_table; struct mlx5_flow_group; -struct mlx5_flow_rule; struct mlx5_flow_namespace; +struct mlx5_flow_handle; struct mlx5_flow_spec { u8 match_criteria_enable; @@ -127,18 +127,20 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg); /* Single destination per rule. * Group ID is implied by the match criteria. */ -struct mlx5_flow_rule * -mlx5_add_flow_rule(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec, - u32 action, - u32 flow_tag, - struct mlx5_flow_destination *dest); -void mlx5_del_flow_rule(struct mlx5_flow_rule *fr); - -int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, - struct mlx5_flow_destination *dest); - -struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_rule *rule); +struct mlx5_flow_handle * +mlx5_add_flow_rules(struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec, + u32 action, + u32 flow_tag, + struct mlx5_flow_destination *dest, + int dest_num); +void mlx5_del_flow_rules(struct mlx5_flow_handle *fr); + +int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler, + struct mlx5_flow_destination *new_dest, + struct mlx5_flow_destination *old_dest); + +struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handler); struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, -- cgit v1.2.3 From e02737d5b82640497637d18428e2793bb7f02881 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 28 Oct 2016 15:04:46 -0700 Subject: x86/intel_rdt: Add tasks files The root directory all subdirectories are automatically populated with a read/write (mode 0644) file named "tasks". When read it will show all the task IDs assigned to the resource group. Tasks can be added (one at a time) to a group by writing the task ID to the file. E.g. Membership in a resource group is indicated by a new field in the task_struct "int closid" which holds the CLOSID for each task. The default resource group uses CLOSID=0 which means that all existing tasks when the resctrl file system is mounted belong to the default group. If a group is removed, tasks which are members of that group are moved to the default group. Signed-off-by: Fenghua Yu Cc: "Ravi V Shankar" Cc: "Tony Luck" Cc: "Shaohua Li" Cc: "Sai Prakhya" Cc: "Peter Zijlstra" Cc: "Stephane Eranian" Cc: "Dave Hansen" Cc: "David Carrillo-Cisneros" Cc: "Nilay Vaish" Cc: "Vikas Shivappa" Cc: "Ingo Molnar" Cc: "Borislav Petkov" Cc: "H. Peter Anvin" Link: http://lkml.kernel.org/r/1477692289-37412-8-git-send-email-fenghua.yu@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 180 +++++++++++++++++++++++++++++++ include/linux/sched.h | 3 + 2 files changed, 183 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index e05a18685fc8..5cc0865f2908 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -28,6 +28,7 @@ #include #include #include +#include #include @@ -267,6 +268,162 @@ unlock: return ret ?: nbytes; } +struct task_move_callback { + struct callback_head work; + struct rdtgroup *rdtgrp; +}; + +static void move_myself(struct callback_head *head) +{ + struct task_move_callback *callback; + struct rdtgroup *rdtgrp; + + callback = container_of(head, struct task_move_callback, work); + rdtgrp = callback->rdtgrp; + + /* + * If resource group was deleted before this task work callback + * was invoked, then assign the task to root group and free the + * resource group. + */ + if (atomic_dec_and_test(&rdtgrp->waitcount) && + (rdtgrp->flags & RDT_DELETED)) { + current->closid = 0; + kfree(rdtgrp); + } + + kfree(callback); +} + +static int __rdtgroup_move_task(struct task_struct *tsk, + struct rdtgroup *rdtgrp) +{ + struct task_move_callback *callback; + int ret; + + callback = kzalloc(sizeof(*callback), GFP_KERNEL); + if (!callback) + return -ENOMEM; + callback->work.func = move_myself; + callback->rdtgrp = rdtgrp; + + /* + * Take a refcount, so rdtgrp cannot be freed before the + * callback has been invoked. + */ + atomic_inc(&rdtgrp->waitcount); + ret = task_work_add(tsk, &callback->work, true); + if (ret) { + /* + * Task is exiting. Drop the refcount and free the callback. + * No need to check the refcount as the group cannot be + * deleted before the write function unlocks rdtgroup_mutex. + */ + atomic_dec(&rdtgrp->waitcount); + kfree(callback); + } else { + tsk->closid = rdtgrp->closid; + } + return ret; +} + +static int rdtgroup_task_write_permission(struct task_struct *task, + struct kernfs_open_file *of) +{ + const struct cred *tcred = get_task_cred(task); + const struct cred *cred = current_cred(); + int ret = 0; + + /* + * Even if we're attaching all tasks in the thread group, we only + * need to check permissions on one of them. + */ + if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && + !uid_eq(cred->euid, tcred->uid) && + !uid_eq(cred->euid, tcred->suid)) + ret = -EPERM; + + put_cred(tcred); + return ret; +} + +static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp, + struct kernfs_open_file *of) +{ + struct task_struct *tsk; + int ret; + + rcu_read_lock(); + if (pid) { + tsk = find_task_by_vpid(pid); + if (!tsk) { + rcu_read_unlock(); + return -ESRCH; + } + } else { + tsk = current; + } + + get_task_struct(tsk); + rcu_read_unlock(); + + ret = rdtgroup_task_write_permission(tsk, of); + if (!ret) + ret = __rdtgroup_move_task(tsk, rdtgrp); + + put_task_struct(tsk); + return ret; +} + +static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct rdtgroup *rdtgrp; + int ret = 0; + pid_t pid; + + if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) + return -EINVAL; + rdtgrp = rdtgroup_kn_lock_live(of->kn); + + if (rdtgrp) + ret = rdtgroup_move_task(pid, rdtgrp, of); + else + ret = -ENOENT; + + rdtgroup_kn_unlock(of->kn); + + return ret ?: nbytes; +} + +static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s) +{ + struct task_struct *p, *t; + + rcu_read_lock(); + for_each_process_thread(p, t) { + if (t->closid == r->closid) + seq_printf(s, "%d\n", t->pid); + } + rcu_read_unlock(); +} + +static int rdtgroup_tasks_show(struct kernfs_open_file *of, + struct seq_file *s, void *v) +{ + struct rdtgroup *rdtgrp; + int ret = 0; + + rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (rdtgrp) + show_rdt_tasks(rdtgrp, s); + else + ret = -ENOENT; + rdtgroup_kn_unlock(of->kn); + + return ret; +} + /* Files in each rdtgroup */ static struct rftype rdtgroup_base_files[] = { { @@ -276,6 +433,13 @@ static struct rftype rdtgroup_base_files[] = { .write = rdtgroup_cpus_write, .seq_show = rdtgroup_cpus_show, }, + { + .name = "tasks", + .mode = 0644, + .kf_ops = &rdtgroup_kf_single_ops, + .write = rdtgroup_tasks_write, + .seq_show = rdtgroup_tasks_show, + }, }; static int rdt_num_closids_show(struct kernfs_open_file *of, @@ -592,6 +756,13 @@ static void rdt_reset_pqr_assoc_closid(void *v) static void rmdir_all_sub(void) { struct rdtgroup *rdtgrp, *tmp; + struct task_struct *p, *t; + + /* move all tasks to default resource group */ + read_lock(&tasklist_lock); + for_each_process_thread(p, t) + t->closid = 0; + read_unlock(&tasklist_lock); get_cpu(); /* Reset PQR_ASSOC MSR on this cpu. */ @@ -712,6 +883,7 @@ out_unlock: static int rdtgroup_rmdir(struct kernfs_node *kn) { + struct task_struct *p, *t; struct rdtgroup *rdtgrp; int cpu, ret = 0; @@ -721,6 +893,14 @@ static int rdtgroup_rmdir(struct kernfs_node *kn) return -ENOENT; } + /* Give any tasks back to the default group */ + read_lock(&tasklist_lock); + for_each_process_thread(p, t) { + if (t->closid == rdtgrp->closid) + t->closid = 0; + } + read_unlock(&tasklist_lock); + /* Give any CPUs back to the default group */ cpumask_or(&rdtgroup_default.cpu_mask, &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask); diff --git a/include/linux/sched.h b/include/linux/sched.h index 348f51b0ec92..c8f4152e7265 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1791,6 +1791,9 @@ struct task_struct { /* cg_list protected by css_set_lock and tsk->alloc_lock */ struct list_head cg_list; #endif +#ifdef CONFIG_INTEL_RDT_A + int closid; +#endif #ifdef CONFIG_FUTEX struct robust_list_head __user *robust_list; #ifdef CONFIG_COMPAT -- cgit v1.2.3 From 766763dbdc1dca11deabdb00077a1c19e2803f0a Mon Sep 17 00:00:00 2001 From: Alexandre Bailon Date: Thu, 27 Oct 2016 17:32:36 +0200 Subject: ARM: davinci: da8xx: Remove duplicated defines Some macro for DA8xx CFGCHIP are defined in usb-davinci.h, but da8xx-cfgchip.h intend to replace them. Remove duplicated defines between da8xx-cfgchip.h and usb-davinci.h Signed-off-by: Alexandre Bailon Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/board-da830-evm.c | 5 +++-- arch/arm/mach-davinci/board-omapl138-hawk.c | 3 ++- include/linux/platform_data/usb-davinci.h | 23 ----------------------- 3 files changed, 5 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c index 3d8cf8cbd98a..df1f4091a8ae 100644 --- a/arch/arm/mach-davinci/board-da830-evm.c +++ b/arch/arm/mach-davinci/board-da830-evm.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -116,7 +117,7 @@ static __init void da830_evm_usb_init(void) cfgchip2 = __raw_readl(DA8XX_SYSCFG0_VIRT(DA8XX_CFGCHIP2_REG)); /* USB2.0 PHY reference clock is 24 MHz */ - cfgchip2 &= ~CFGCHIP2_REFFREQ; + cfgchip2 &= ~CFGCHIP2_REFFREQ_MASK; cfgchip2 |= CFGCHIP2_REFFREQ_24MHZ; /* @@ -133,7 +134,7 @@ static __init void da830_evm_usb_init(void) * controller won't be able to drive VBUS thinking that it's a B-device. * Otherwise, we want to use the OTG mode and enable VBUS comparators. */ - cfgchip2 &= ~CFGCHIP2_OTGMODE; + cfgchip2 &= ~CFGCHIP2_OTGMODE_MASK; #ifdef CONFIG_USB_MUSB_HOST cfgchip2 |= CFGCHIP2_FORCE_HOST; #else diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c index ee624861ca66..e1efa1066c1e 100644 --- a/arch/arm/mach-davinci/board-omapl138-hawk.c +++ b/arch/arm/mach-davinci/board-omapl138-hawk.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -254,7 +255,7 @@ static __init void omapl138_hawk_usb_init(void) /* Setup the Ref. clock frequency for the HAWK at 24 MHz. */ cfgchip2 = __raw_readl(DA8XX_SYSCFG0_VIRT(DA8XX_CFGCHIP2_REG)); - cfgchip2 &= ~CFGCHIP2_REFFREQ; + cfgchip2 &= ~CFGCHIP2_REFFREQ_MASK; cfgchip2 |= CFGCHIP2_REFFREQ_24MHZ; __raw_writel(cfgchip2, DA8XX_SYSCFG0_VIRT(DA8XX_CFGCHIP2_REG)); diff --git a/include/linux/platform_data/usb-davinci.h b/include/linux/platform_data/usb-davinci.h index e0bc4abe69c2..0926e99f2e8f 100644 --- a/include/linux/platform_data/usb-davinci.h +++ b/include/linux/platform_data/usb-davinci.h @@ -11,29 +11,6 @@ #ifndef __ASM_ARCH_USB_H #define __ASM_ARCH_USB_H -/* DA8xx CFGCHIP2 (USB 2.0 PHY Control) register bits */ -#define CFGCHIP2_PHYCLKGD (1 << 17) -#define CFGCHIP2_VBUSSENSE (1 << 16) -#define CFGCHIP2_RESET (1 << 15) -#define CFGCHIP2_OTGMODE (3 << 13) -#define CFGCHIP2_NO_OVERRIDE (0 << 13) -#define CFGCHIP2_FORCE_HOST (1 << 13) -#define CFGCHIP2_FORCE_DEVICE (2 << 13) -#define CFGCHIP2_FORCE_HOST_VBUS_LOW (3 << 13) -#define CFGCHIP2_USB1PHYCLKMUX (1 << 12) -#define CFGCHIP2_USB2PHYCLKMUX (1 << 11) -#define CFGCHIP2_PHYPWRDN (1 << 10) -#define CFGCHIP2_OTGPWRDN (1 << 9) -#define CFGCHIP2_DATPOL (1 << 8) -#define CFGCHIP2_USB1SUSPENDM (1 << 7) -#define CFGCHIP2_PHY_PLLON (1 << 6) /* override PLL suspend */ -#define CFGCHIP2_SESENDEN (1 << 5) /* Vsess_end comparator */ -#define CFGCHIP2_VBDTCTEN (1 << 4) /* Vbus comparator */ -#define CFGCHIP2_REFFREQ (0xf << 0) -#define CFGCHIP2_REFFREQ_12MHZ (1 << 0) -#define CFGCHIP2_REFFREQ_24MHZ (2 << 0) -#define CFGCHIP2_REFFREQ_48MHZ (3 << 0) - struct da8xx_ohci_root_hub; typedef void (*da8xx_ocic_handler_t)(struct da8xx_ohci_root_hub *hub, -- cgit v1.2.3 From c62cce2caee558e18aa05c01c2fd3b40f07174f2 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 24 Oct 2016 18:29:13 -0700 Subject: net: add an ioctl to get a socket network namespace Each socket operates in a network namespace where it has been created, so if we want to dump and restore a socket, we have to know its network namespace. We have a socket_diag to get information about sockets, it doesn't report sockets which are not bound or connected. This patch introduces a new socket ioctl, which is called SIOCGSKNS and used to get a file descriptor for a socket network namespace. A task must have CAP_NET_ADMIN in a target network namespace to use this ioctl. Cc: "David S. Miller" Cc: Eric W. Biederman Signed-off-by: Andrei Vagin Signed-off-by: David S. Miller --- fs/nsfs.c | 2 +- include/linux/proc_fs.h | 4 ++++ include/uapi/linux/sockios.h | 1 + net/socket.c | 13 +++++++++++++ 4 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/nsfs.c b/fs/nsfs.c index 8718af895eab..8c9fb29c6673 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -118,7 +118,7 @@ again: return ret; } -static int open_related_ns(struct ns_common *ns, +int open_related_ns(struct ns_common *ns, struct ns_common *(*get_ns)(struct ns_common *ns)) { struct path path = {}; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index b97bf2ef996e..368c7ad06ae5 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -82,4 +82,8 @@ static inline struct proc_dir_entry *proc_net_mkdir( return proc_mkdir_data(name, 0, parent, net); } +struct ns_common; +int open_related_ns(struct ns_common *ns, + struct ns_common *(*get_ns)(struct ns_common *ns)); + #endif /* _LINUX_PROC_FS_H */ diff --git a/include/uapi/linux/sockios.h b/include/uapi/linux/sockios.h index 8e7890b26d9a..83cc54ce6081 100644 --- a/include/uapi/linux/sockios.h +++ b/include/uapi/linux/sockios.h @@ -84,6 +84,7 @@ #define SIOCWANDEV 0x894A /* get/set netdev parameters */ #define SIOCOUTQNSD 0x894B /* output queue size (not sent only) */ +#define SIOCGSKNS 0x894C /* get socket network namespace */ /* ARP cache control calls. */ /* 0x8950 - 0x8952 * obsolete calls, don't re-use */ diff --git a/net/socket.c b/net/socket.c index 5a9bf5ee2464..970a7ea3fc4a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -877,6 +877,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, * what to do with it - that's up to the protocol still. */ +static struct ns_common *get_net_ns(struct ns_common *ns) +{ + return &get_net(container_of(ns, struct net, ns))->ns; +} + static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct socket *sock; @@ -945,6 +950,13 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) err = dlci_ioctl_hook(cmd, argp); mutex_unlock(&dlci_ioctl_mutex); break; + case SIOCGSKNS: + err = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + break; + + err = open_related_ns(&net->ns, get_net_ns); + break; default: err = sock_do_ioctl(net, sock, cmd, arg); break; @@ -3093,6 +3105,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCSIFVLAN: case SIOCADDDLCI: case SIOCDELDLCI: + case SIOCGSKNS: return sock_ioctl(file, cmd, arg); case SIOCGIFFLAGS: -- cgit v1.2.3 From 9ed9895370aedd6032af2a9181c62c394d08223b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 30 Oct 2016 17:32:16 +0100 Subject: driver core: Functional dependencies tracking support Currently, there is a problem with taking functional dependencies between devices into account. What I mean by a "functional dependency" is when the driver of device B needs device A to be functional and (generally) its driver to be present in order to work properly. This has certain consequences for power management (suspend/resume and runtime PM ordering) and shutdown ordering of these devices. In general, it also implies that the driver of A needs to be working for B to be probed successfully and it cannot be unbound from the device before the B's driver. Support for representing those functional dependencies between devices is added here to allow the driver core to track them and act on them in certain cases where applicable. The argument for doing that in the driver core is that there are quite a few distinct use cases involving device dependencies, they are relatively hard to get right in a driver (if one wants to address all of them properly) and it only gets worse if multiplied by the number of drivers potentially needing to do it. Morever, at least one case (asynchronous system suspend/resume) cannot be handled in a single driver at all, because it requires the driver of A to wait for B to suspend (during system suspend) and the driver of B to wait for A to resume (during system resume). For this reason, represent dependencies between devices as "links", with the help of struct device_link objects each containing pointers to the "linked" devices, a list node for each of them, status information, flags, and an RCU head for synchronization. Also add two new list heads, representing the lists of links to the devices that depend on the given one (consumers) and to the devices depended on by it (suppliers), and a "driver presence status" field (needed for figuring out initial states of device links) to struct device. The entire data structure consisting of all of the lists of link objects for all devices is protected by a mutex (for link object addition/removal and for list walks during device driver probing and removal) and by SRCU (for list walking in other case that will be introduced by subsequent change sets). If CONFIG_SRCU is not selected, however, an rwsem is used for protecting the entire data structure. In addition, each link object has an internal status field whose value reflects whether or not drivers are bound to the devices pointed to by the link or probing/removal of their drivers is in progress etc. That field is only modified under the device links mutex, but it may be read outside of it in some cases (introduced by subsequent change sets), so modifications of it are annotated with WRITE_ONCE(). New links are added by calling device_link_add() which takes three arguments: pointers to the devices in question and flags. In particular, if DL_FLAG_STATELESS is set in the flags, the link status is not to be taken into account for this link and the driver core will not manage it. In turn, if DL_FLAG_AUTOREMOVE is set in the flags, the driver core will remove the link automatically when the consumer device driver unbinds from it. One of the actions carried out by device_link_add() is to reorder the lists used for device shutdown and system suspend/resume to put the consumer device along with all of its children and all of its consumers (and so on, recursively) to the ends of those lists in order to ensure the right ordering between all of the supplier and consumer devices. For this reason, it is not possible to create a link between two devices if the would-be supplier device already depends on the would-be consumer device as either a direct descendant of it or a consumer of one of its direct descendants or one of its consumers and so on. There are two types of link objects, persistent and non-persistent. The persistent ones stay around until one of the target devices is deleted, while the non-persistent ones are removed automatically when the consumer driver unbinds from its device (ie. they are assumed to be valid only as long as the consumer device has a driver bound to it). Persistent links are created by default and non-persistent links are created when the DL_FLAG_AUTOREMOVE flag is passed to device_link_add(). Both persistent and non-persistent device links can be deleted with an explicit call to device_link_del(). Links created without the DL_FLAG_STATELESS flag set are managed by the driver core using a simple state machine. There are 5 states each link can be in: DORMANT (unused), AVAILABLE (the supplier driver is present and functional), CONSUMER_PROBE (the consumer driver is probing), ACTIVE (both supplier and consumer drivers are present and functional), and SUPPLIER_UNBIND (the supplier driver is unbinding). The driver core updates the link state automatically depending on what happens to the linked devices and for each link state specific actions are taken in addition to that. For example, if the supplier driver unbinds from its device, the driver core will also unbind the drivers of all of its consumers automatically under the assumption that they cannot function properly without the supplier. Analogously, the driver core will only allow the consumer driver to bind to its device if the supplier driver is present and functional (ie. the link is in the AVAILABLE state). If that's not the case, it will rely on the existing deferred probing mechanism to wait for the supplier driver to become available. Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/base.h | 13 ++ drivers/base/core.c | 540 +++++++++++++++++++++++++++++++++++++++++++++ drivers/base/dd.c | 41 +++- drivers/base/power/main.c | 2 + drivers/base/power/power.h | 10 + include/linux/device.h | 80 +++++++ include/linux/pm.h | 1 + 7 files changed, 682 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/base.h b/drivers/base/base.h index e05db388bd1c..e19b1008e5fb 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -107,6 +107,9 @@ extern void bus_remove_device(struct device *dev); extern int bus_add_driver(struct device_driver *drv); extern void bus_remove_driver(struct device_driver *drv); +extern void device_release_driver_internal(struct device *dev, + struct device_driver *drv, + struct device *parent); extern void driver_detach(struct device_driver *drv); extern int driver_probe_device(struct device_driver *drv, struct device *dev); @@ -152,3 +155,13 @@ extern int devtmpfs_init(void); #else static inline int devtmpfs_init(void) { return 0; } #endif + +/* Device links support */ +extern int device_links_read_lock(void); +extern void device_links_read_unlock(int idx); +extern int device_links_check_suppliers(struct device *dev); +extern void device_links_driver_bound(struct device *dev); +extern void device_links_driver_cleanup(struct device *dev); +extern void device_links_no_driver(struct device *dev); +extern bool device_links_busy(struct device *dev); +extern void device_links_unbind_consumers(struct device *dev); diff --git a/drivers/base/core.c b/drivers/base/core.c index ce057a568673..3c5ff17f578f 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -44,6 +44,541 @@ static int __init sysfs_deprecated_setup(char *arg) early_param("sysfs.deprecated", sysfs_deprecated_setup); #endif +/* Device links support. */ + +#ifdef CONFIG_SRCU +static DEFINE_MUTEX(device_links_lock); +DEFINE_STATIC_SRCU(device_links_srcu); + +static inline void device_links_write_lock(void) +{ + mutex_lock(&device_links_lock); +} + +static inline void device_links_write_unlock(void) +{ + mutex_unlock(&device_links_lock); +} + +int device_links_read_lock(void) +{ + return srcu_read_lock(&device_links_srcu); +} + +void device_links_read_unlock(int idx) +{ + srcu_read_unlock(&device_links_srcu, idx); +} +#else /* !CONFIG_SRCU */ +static DECLARE_RWSEM(device_links_lock); + +static inline void device_links_write_lock(void) +{ + down_write(&device_links_lock); +} + +static inline void device_links_write_unlock(void) +{ + up_write(&device_links_lock); +} + +int device_links_read_lock(void) +{ + down_read(&device_links_lock); + return 0; +} + +void device_links_read_unlock(int not_used) +{ + up_read(&device_links_lock); +} +#endif /* !CONFIG_SRCU */ + +/** + * device_is_dependent - Check if one device depends on another one + * @dev: Device to check dependencies for. + * @target: Device to check against. + * + * Check if @target depends on @dev or any device dependent on it (its child or + * its consumer etc). Return 1 if that is the case or 0 otherwise. + */ +static int device_is_dependent(struct device *dev, void *target) +{ + struct device_link *link; + int ret; + + if (WARN_ON(dev == target)) + return 1; + + ret = device_for_each_child(dev, target, device_is_dependent); + if (ret) + return ret; + + list_for_each_entry(link, &dev->links.consumers, s_node) { + if (WARN_ON(link->consumer == target)) + return 1; + + ret = device_is_dependent(link->consumer, target); + if (ret) + break; + } + return ret; +} + +static int device_reorder_to_tail(struct device *dev, void *not_used) +{ + struct device_link *link; + + /* + * Devices that have not been registered yet will be put to the ends + * of the lists during the registration, so skip them here. + */ + if (device_is_registered(dev)) + devices_kset_move_last(dev); + + if (device_pm_initialized(dev)) + device_pm_move_last(dev); + + device_for_each_child(dev, NULL, device_reorder_to_tail); + list_for_each_entry(link, &dev->links.consumers, s_node) + device_reorder_to_tail(link->consumer, NULL); + + return 0; +} + +/** + * device_link_add - Create a link between two devices. + * @consumer: Consumer end of the link. + * @supplier: Supplier end of the link. + * @flags: Link flags. + * + * If the DL_FLAG_AUTOREMOVE is set, the link will be removed automatically + * when the consumer device driver unbinds from it. The combination of both + * DL_FLAG_AUTOREMOVE and DL_FLAG_STATELESS set is invalid and will cause NULL + * to be returned. + * + * A side effect of the link creation is re-ordering of dpm_list and the + * devices_kset list by moving the consumer device and all devices depending + * on it to the ends of these lists (that does not happen to devices that have + * not been registered when this function is called). + * + * The supplier device is required to be registered when this function is called + * and NULL will be returned if that is not the case. The consumer device need + * not be registerd, however. + */ +struct device_link *device_link_add(struct device *consumer, + struct device *supplier, u32 flags) +{ + struct device_link *link; + + if (!consumer || !supplier || + ((flags & DL_FLAG_STATELESS) && (flags & DL_FLAG_AUTOREMOVE))) + return NULL; + + device_links_write_lock(); + device_pm_lock(); + + /* + * If the supplier has not been fully registered yet or there is a + * reverse dependency between the consumer and the supplier already in + * the graph, return NULL. + */ + if (!device_pm_initialized(supplier) + || device_is_dependent(consumer, supplier)) { + link = NULL; + goto out; + } + + list_for_each_entry(link, &supplier->links.consumers, s_node) + if (link->consumer == consumer) + goto out; + + link = kmalloc(sizeof(*link), GFP_KERNEL); + if (!link) + goto out; + + get_device(supplier); + link->supplier = supplier; + INIT_LIST_HEAD(&link->s_node); + get_device(consumer); + link->consumer = consumer; + INIT_LIST_HEAD(&link->c_node); + link->flags = flags; + + /* Deterine the initial link state. */ + if (flags & DL_FLAG_STATELESS) { + link->status = DL_STATE_NONE; + } else { + switch (supplier->links.status) { + case DL_DEV_DRIVER_BOUND: + switch (consumer->links.status) { + case DL_DEV_PROBING: + link->status = DL_STATE_CONSUMER_PROBE; + break; + case DL_DEV_DRIVER_BOUND: + link->status = DL_STATE_ACTIVE; + break; + default: + link->status = DL_STATE_AVAILABLE; + break; + } + break; + case DL_DEV_UNBINDING: + link->status = DL_STATE_SUPPLIER_UNBIND; + break; + default: + link->status = DL_STATE_DORMANT; + break; + } + } + + /* + * Move the consumer and all of the devices depending on it to the end + * of dpm_list and the devices_kset list. + * + * It is necessary to hold dpm_list locked throughout all that or else + * we may end up suspending with a wrong ordering of it. + */ + device_reorder_to_tail(consumer, NULL); + + list_add_tail_rcu(&link->s_node, &supplier->links.consumers); + list_add_tail_rcu(&link->c_node, &consumer->links.suppliers); + + dev_info(consumer, "Linked as a consumer to %s\n", dev_name(supplier)); + + out: + device_pm_unlock(); + device_links_write_unlock(); + return link; +} +EXPORT_SYMBOL_GPL(device_link_add); + +static void device_link_free(struct device_link *link) +{ + put_device(link->consumer); + put_device(link->supplier); + kfree(link); +} + +#ifdef CONFIG_SRCU +static void __device_link_free_srcu(struct rcu_head *rhead) +{ + device_link_free(container_of(rhead, struct device_link, rcu_head)); +} + +static void __device_link_del(struct device_link *link) +{ + dev_info(link->consumer, "Dropping the link to %s\n", + dev_name(link->supplier)); + + list_del_rcu(&link->s_node); + list_del_rcu(&link->c_node); + call_srcu(&device_links_srcu, &link->rcu_head, __device_link_free_srcu); +} +#else /* !CONFIG_SRCU */ +static void __device_link_del(struct device_link *link) +{ + dev_info(link->consumer, "Dropping the link to %s\n", + dev_name(link->supplier)); + + list_del(&link->s_node); + list_del(&link->c_node); + device_link_free(link); +} +#endif /* !CONFIG_SRCU */ + +/** + * device_link_del - Delete a link between two devices. + * @link: Device link to delete. + * + * The caller must ensure proper synchronization of this function with runtime + * PM. + */ +void device_link_del(struct device_link *link) +{ + device_links_write_lock(); + device_pm_lock(); + __device_link_del(link); + device_pm_unlock(); + device_links_write_unlock(); +} +EXPORT_SYMBOL_GPL(device_link_del); + +static void device_links_missing_supplier(struct device *dev) +{ + struct device_link *link; + + list_for_each_entry(link, &dev->links.suppliers, c_node) + if (link->status == DL_STATE_CONSUMER_PROBE) + WRITE_ONCE(link->status, DL_STATE_AVAILABLE); +} + +/** + * device_links_check_suppliers - Check presence of supplier drivers. + * @dev: Consumer device. + * + * Check links from this device to any suppliers. Walk the list of the device's + * links to suppliers and see if all of them are available. If not, simply + * return -EPROBE_DEFER. + * + * We need to guarantee that the supplier will not go away after the check has + * been positive here. It only can go away in __device_release_driver() and + * that function checks the device's links to consumers. This means we need to + * mark the link as "consumer probe in progress" to make the supplier removal + * wait for us to complete (or bad things may happen). + * + * Links with the DL_FLAG_STATELESS flag set are ignored. + */ +int device_links_check_suppliers(struct device *dev) +{ + struct device_link *link; + int ret = 0; + + device_links_write_lock(); + + list_for_each_entry(link, &dev->links.suppliers, c_node) { + if (link->flags & DL_FLAG_STATELESS) + continue; + + if (link->status != DL_STATE_AVAILABLE) { + device_links_missing_supplier(dev); + ret = -EPROBE_DEFER; + break; + } + WRITE_ONCE(link->status, DL_STATE_CONSUMER_PROBE); + } + dev->links.status = DL_DEV_PROBING; + + device_links_write_unlock(); + return ret; +} + +/** + * device_links_driver_bound - Update device links after probing its driver. + * @dev: Device to update the links for. + * + * The probe has been successful, so update links from this device to any + * consumers by changing their status to "available". + * + * Also change the status of @dev's links to suppliers to "active". + * + * Links with the DL_FLAG_STATELESS flag set are ignored. + */ +void device_links_driver_bound(struct device *dev) +{ + struct device_link *link; + + device_links_write_lock(); + + list_for_each_entry(link, &dev->links.consumers, s_node) { + if (link->flags & DL_FLAG_STATELESS) + continue; + + WARN_ON(link->status != DL_STATE_DORMANT); + WRITE_ONCE(link->status, DL_STATE_AVAILABLE); + } + + list_for_each_entry(link, &dev->links.suppliers, c_node) { + if (link->flags & DL_FLAG_STATELESS) + continue; + + WARN_ON(link->status != DL_STATE_CONSUMER_PROBE); + WRITE_ONCE(link->status, DL_STATE_ACTIVE); + } + + dev->links.status = DL_DEV_DRIVER_BOUND; + + device_links_write_unlock(); +} + +/** + * __device_links_no_driver - Update links of a device without a driver. + * @dev: Device without a drvier. + * + * Delete all non-persistent links from this device to any suppliers. + * + * Persistent links stay around, but their status is changed to "available", + * unless they already are in the "supplier unbind in progress" state in which + * case they need not be updated. + * + * Links with the DL_FLAG_STATELESS flag set are ignored. + */ +static void __device_links_no_driver(struct device *dev) +{ + struct device_link *link, *ln; + + list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) { + if (link->flags & DL_FLAG_STATELESS) + continue; + + if (link->flags & DL_FLAG_AUTOREMOVE) + __device_link_del(link); + else if (link->status != DL_STATE_SUPPLIER_UNBIND) + WRITE_ONCE(link->status, DL_STATE_AVAILABLE); + } + + dev->links.status = DL_DEV_NO_DRIVER; +} + +void device_links_no_driver(struct device *dev) +{ + device_links_write_lock(); + __device_links_no_driver(dev); + device_links_write_unlock(); +} + +/** + * device_links_driver_cleanup - Update links after driver removal. + * @dev: Device whose driver has just gone away. + * + * Update links to consumers for @dev by changing their status to "dormant" and + * invoke %__device_links_no_driver() to update links to suppliers for it as + * appropriate. + * + * Links with the DL_FLAG_STATELESS flag set are ignored. + */ +void device_links_driver_cleanup(struct device *dev) +{ + struct device_link *link; + + device_links_write_lock(); + + list_for_each_entry(link, &dev->links.consumers, s_node) { + if (link->flags & DL_FLAG_STATELESS) + continue; + + WARN_ON(link->flags & DL_FLAG_AUTOREMOVE); + WARN_ON(link->status != DL_STATE_SUPPLIER_UNBIND); + WRITE_ONCE(link->status, DL_STATE_DORMANT); + } + + __device_links_no_driver(dev); + + device_links_write_unlock(); +} + +/** + * device_links_busy - Check if there are any busy links to consumers. + * @dev: Device to check. + * + * Check each consumer of the device and return 'true' if its link's status + * is one of "consumer probe" or "active" (meaning that the given consumer is + * probing right now or its driver is present). Otherwise, change the link + * state to "supplier unbind" to prevent the consumer from being probed + * successfully going forward. + * + * Return 'false' if there are no probing or active consumers. + * + * Links with the DL_FLAG_STATELESS flag set are ignored. + */ +bool device_links_busy(struct device *dev) +{ + struct device_link *link; + bool ret = false; + + device_links_write_lock(); + + list_for_each_entry(link, &dev->links.consumers, s_node) { + if (link->flags & DL_FLAG_STATELESS) + continue; + + if (link->status == DL_STATE_CONSUMER_PROBE + || link->status == DL_STATE_ACTIVE) { + ret = true; + break; + } + WRITE_ONCE(link->status, DL_STATE_SUPPLIER_UNBIND); + } + + dev->links.status = DL_DEV_UNBINDING; + + device_links_write_unlock(); + return ret; +} + +/** + * device_links_unbind_consumers - Force unbind consumers of the given device. + * @dev: Device to unbind the consumers of. + * + * Walk the list of links to consumers for @dev and if any of them is in the + * "consumer probe" state, wait for all device probes in progress to complete + * and start over. + * + * If that's not the case, change the status of the link to "supplier unbind" + * and check if the link was in the "active" state. If so, force the consumer + * driver to unbind and start over (the consumer will not re-probe as we have + * changed the state of the link already). + * + * Links with the DL_FLAG_STATELESS flag set are ignored. + */ +void device_links_unbind_consumers(struct device *dev) +{ + struct device_link *link; + + start: + device_links_write_lock(); + + list_for_each_entry(link, &dev->links.consumers, s_node) { + enum device_link_state status; + + if (link->flags & DL_FLAG_STATELESS) + continue; + + status = link->status; + if (status == DL_STATE_CONSUMER_PROBE) { + device_links_write_unlock(); + + wait_for_device_probe(); + goto start; + } + WRITE_ONCE(link->status, DL_STATE_SUPPLIER_UNBIND); + if (status == DL_STATE_ACTIVE) { + struct device *consumer = link->consumer; + + get_device(consumer); + + device_links_write_unlock(); + + device_release_driver_internal(consumer, NULL, + consumer->parent); + put_device(consumer); + goto start; + } + } + + device_links_write_unlock(); +} + +/** + * device_links_purge - Delete existing links to other devices. + * @dev: Target device. + */ +static void device_links_purge(struct device *dev) +{ + struct device_link *link, *ln; + + /* + * Delete all of the remaining links from this device to any other + * devices (either consumers or suppliers). + */ + device_links_write_lock(); + + list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) { + WARN_ON(link->status == DL_STATE_ACTIVE); + __device_link_del(link); + } + + list_for_each_entry_safe_reverse(link, ln, &dev->links.consumers, s_node) { + WARN_ON(link->status != DL_STATE_DORMANT && + link->status != DL_STATE_NONE); + __device_link_del(link); + } + + device_links_write_unlock(); +} + +/* Device links support end. */ + int (*platform_notify)(struct device *dev) = NULL; int (*platform_notify_remove)(struct device *dev) = NULL; static struct kobject *dev_kobj; @@ -711,6 +1246,9 @@ void device_initialize(struct device *dev) #ifdef CONFIG_GENERIC_MSI_IRQ INIT_LIST_HEAD(&dev->msi_list); #endif + INIT_LIST_HEAD(&dev->links.consumers); + INIT_LIST_HEAD(&dev->links.suppliers); + dev->links.status = DL_DEV_NO_DRIVER; } EXPORT_SYMBOL_GPL(device_initialize); @@ -1258,6 +1796,8 @@ void device_del(struct device *dev) if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_DEL_DEVICE, dev); + + device_links_purge(dev); dpm_sysfs_remove(dev); if (parent) klist_del(&dev->p->knode_parent); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index df4ab5509c04..b2bca3cf0dd2 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -244,6 +244,7 @@ static void driver_bound(struct device *dev) __func__, dev_name(dev)); klist_add_tail(&dev->p->knode_driver, &dev->driver->p->klist_devices); + device_links_driver_bound(dev); device_pm_check_callbacks(dev); @@ -337,6 +338,10 @@ static int really_probe(struct device *dev, struct device_driver *drv) return ret; } + ret = device_links_check_suppliers(dev); + if (ret) + return ret; + atomic_inc(&probe_count); pr_debug("bus: '%s': %s: probing driver %s with device %s\n", drv->bus->name, __func__, drv->name, dev_name(dev)); @@ -415,6 +420,7 @@ probe_failed: blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_DRIVER_NOT_BOUND, dev); pinctrl_bind_failed: + device_links_no_driver(dev); devres_release_all(dev); driver_sysfs_remove(dev); dev->driver = NULL; @@ -771,7 +777,7 @@ EXPORT_SYMBOL_GPL(driver_attach); * __device_release_driver() must be called with @dev lock held. * When called for a USB interface, @dev->parent lock must be held as well. */ -static void __device_release_driver(struct device *dev) +static void __device_release_driver(struct device *dev, struct device *parent) { struct device_driver *drv; @@ -780,6 +786,25 @@ static void __device_release_driver(struct device *dev) if (driver_allows_async_probing(drv)) async_synchronize_full(); + while (device_links_busy(dev)) { + device_unlock(dev); + if (parent) + device_unlock(parent); + + device_links_unbind_consumers(dev); + if (parent) + device_lock(parent); + + device_lock(dev); + /* + * A concurrent invocation of the same function might + * have released the driver successfully while this one + * was waiting, so check for that. + */ + if (dev->driver != drv) + return; + } + pm_runtime_get_sync(dev); driver_sysfs_remove(dev); @@ -795,6 +820,8 @@ static void __device_release_driver(struct device *dev) dev->bus->remove(dev); else if (drv->remove) drv->remove(dev); + + device_links_driver_cleanup(dev); devres_release_all(dev); dev->driver = NULL; dev_set_drvdata(dev, NULL); @@ -811,16 +838,16 @@ static void __device_release_driver(struct device *dev) } } -static void device_release_driver_internal(struct device *dev, - struct device_driver *drv, - struct device *parent) +void device_release_driver_internal(struct device *dev, + struct device_driver *drv, + struct device *parent) { if (parent) device_lock(parent); device_lock(dev); if (!drv || drv == dev->driver) - __device_release_driver(dev); + __device_release_driver(dev, parent); device_unlock(dev); if (parent) @@ -833,6 +860,10 @@ static void device_release_driver_internal(struct device *dev, * * Manually detach device from driver. * When called for a USB interface, @dev->parent lock must be held. + * + * If this function is to be called with @dev->parent lock held, ensure that + * the device's consumers are unbound in advance or that their locks can be + * acquired under the @dev->parent lock. */ void device_release_driver(struct device *dev) { diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index e44944f4be77..420914061405 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -131,6 +131,7 @@ void device_pm_add(struct device *dev) dev_warn(dev, "parent %s should not be sleeping\n", dev_name(dev->parent)); list_add_tail(&dev->power.entry, &dpm_list); + dev->power.in_dpm_list = true; mutex_unlock(&dpm_list_mtx); } @@ -145,6 +146,7 @@ void device_pm_remove(struct device *dev) complete_all(&dev->power.completion); mutex_lock(&dpm_list_mtx); list_del_init(&dev->power.entry); + dev->power.in_dpm_list = false; mutex_unlock(&dpm_list_mtx); device_wakeup_disable(dev); pm_runtime_remove(dev); diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index 50e30e7b059d..0ba7842d665b 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -127,6 +127,11 @@ extern void device_pm_move_after(struct device *, struct device *); extern void device_pm_move_last(struct device *); extern void device_pm_check_callbacks(struct device *dev); +static inline bool device_pm_initialized(struct device *dev) +{ + return dev->power.in_dpm_list; +} + #else /* !CONFIG_PM_SLEEP */ static inline void device_pm_sleep_init(struct device *dev) {} @@ -146,6 +151,11 @@ static inline void device_pm_move_last(struct device *dev) {} static inline void device_pm_check_callbacks(struct device *dev) {} +static inline bool device_pm_initialized(struct device *dev) +{ + return device_is_registered(dev); +} + #endif /* !CONFIG_PM_SLEEP */ static inline void device_pm_init(struct device *dev) diff --git a/include/linux/device.h b/include/linux/device.h index bc41e87a969b..9cae2feaf5cb 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -707,6 +707,81 @@ struct device_dma_parameters { unsigned long segment_boundary_mask; }; +/** + * enum device_link_state - Device link states. + * @DL_STATE_NONE: The presence of the drivers is not being tracked. + * @DL_STATE_DORMANT: None of the supplier/consumer drivers is present. + * @DL_STATE_AVAILABLE: The supplier driver is present, but the consumer is not. + * @DL_STATE_CONSUMER_PROBE: The consumer is probing (supplier driver present). + * @DL_STATE_ACTIVE: Both the supplier and consumer drivers are present. + * @DL_STATE_SUPPLIER_UNBIND: The supplier driver is unbinding. + */ +enum device_link_state { + DL_STATE_NONE = -1, + DL_STATE_DORMANT = 0, + DL_STATE_AVAILABLE, + DL_STATE_CONSUMER_PROBE, + DL_STATE_ACTIVE, + DL_STATE_SUPPLIER_UNBIND, +}; + +/* + * Device link flags. + * + * STATELESS: The core won't track the presence of supplier/consumer drivers. + * AUTOREMOVE: Remove this link automatically on consumer driver unbind. + */ +#define DL_FLAG_STATELESS BIT(0) +#define DL_FLAG_AUTOREMOVE BIT(1) + +/** + * struct device_link - Device link representation. + * @supplier: The device on the supplier end of the link. + * @s_node: Hook to the supplier device's list of links to consumers. + * @consumer: The device on the consumer end of the link. + * @c_node: Hook to the consumer device's list of links to suppliers. + * @status: The state of the link (with respect to the presence of drivers). + * @flags: Link flags. + * @rcu_head: An RCU head to use for deferred execution of SRCU callbacks. + */ +struct device_link { + struct device *supplier; + struct list_head s_node; + struct device *consumer; + struct list_head c_node; + enum device_link_state status; + u32 flags; +#ifdef CONFIG_SRCU + struct rcu_head rcu_head; +#endif +}; + +/** + * enum dl_dev_state - Device driver presence tracking information. + * @DL_DEV_NO_DRIVER: There is no driver attached to the device. + * @DL_DEV_PROBING: A driver is probing. + * @DL_DEV_DRIVER_BOUND: The driver has been bound to the device. + * @DL_DEV_UNBINDING: The driver is unbinding from the device. + */ +enum dl_dev_state { + DL_DEV_NO_DRIVER = 0, + DL_DEV_PROBING, + DL_DEV_DRIVER_BOUND, + DL_DEV_UNBINDING, +}; + +/** + * struct dev_links_info - Device data related to device links. + * @suppliers: List of links to supplier devices. + * @consumers: List of links to consumer devices. + * @status: Driver status information. + */ +struct dev_links_info { + struct list_head suppliers; + struct list_head consumers; + enum dl_dev_state status; +}; + /** * struct device - The basic device structure * @parent: The device's "parent" device, the device to which it is attached. @@ -799,6 +874,7 @@ struct device { core doesn't touch it */ void *driver_data; /* Driver data, set and get with dev_set/get_drvdata */ + struct dev_links_info links; struct dev_pm_info power; struct dev_pm_domain *pm_domain; @@ -1116,6 +1192,10 @@ extern void device_shutdown(void); /* debugging and troubleshooting/diagnostic helpers. */ extern const char *dev_driver_string(const struct device *dev); +/* Device links interface. */ +struct device_link *device_link_add(struct device *consumer, + struct device *supplier, u32 flags); +void device_link_del(struct device_link *link); #ifdef CONFIG_PRINTK diff --git a/include/linux/pm.h b/include/linux/pm.h index 06eb353182ab..721a70241fcd 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -559,6 +559,7 @@ struct dev_pm_info { pm_message_t power_state; unsigned int can_wakeup:1; unsigned int async_suspend:1; + bool in_dpm_list:1; /* Owned by the PM core */ bool is_prepared:1; /* Owned by the PM core */ bool is_suspended:1; /* Ditto */ bool is_noirq_suspended:1; -- cgit v1.2.3 From 21d5c57b3726166421251e94dabab047baaf8ce4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 30 Oct 2016 17:32:31 +0100 Subject: PM / runtime: Use device links Modify the runtime PM framework to use device links to ensure that supplier devices will not be suspended if any of their consumer devices are active. The idea is to reference count suppliers on the consumer's resume and drop references to them on its suspend. The information on whether or not the supplier has been reference counted by the consumer's (runtime) resume is stored in a new field (rpm_active) in the link object for each link. It may be necessary to clean up those references when the supplier is unbinding and that's why the links whose status is DEVICE_LINK_SUPPLIER_UNBIND are skipped by the runtime suspend and resume code. The above means that if the consumer device is probed in the runtime-active state, the supplier has to be resumed and reference counted by device_link_add() so the code works as expected on its (runtime) suspend. There is a new flag, DEVICE_LINK_RPM_ACTIVE, to tell device_link_add() about that (in which case the caller is responsible for making sure that the consumer really will be runtime-active when runtime PM is enabled for it). The other new link flag, DEVICE_LINK_PM_RUNTIME, tells the core whether or not the link should be used for runtime PM at all. Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 27 +++++++- drivers/base/dd.c | 3 + drivers/base/power/runtime.c | 157 +++++++++++++++++++++++++++++++++++++++++-- include/linux/device.h | 6 ++ include/linux/pm_runtime.h | 6 ++ 5 files changed, 193 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/core.c b/drivers/base/core.c index 3c5ff17f578f..876c62b12462 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -152,6 +152,14 @@ static int device_reorder_to_tail(struct device *dev, void *not_used) * @supplier: Supplier end of the link. * @flags: Link flags. * + * The caller is responsible for the proper synchronization of the link creation + * with runtime PM. First, setting the DL_FLAG_PM_RUNTIME flag will cause the + * runtime PM framework to take the link into account. Second, if the + * DL_FLAG_RPM_ACTIVE flag is set in addition to it, the supplier devices will + * be forced into the active metastate and reference-counted upon the creation + * of the link. If DL_FLAG_PM_RUNTIME is not set, DL_FLAG_RPM_ACTIVE will be + * ignored. + * * If the DL_FLAG_AUTOREMOVE is set, the link will be removed automatically * when the consumer device driver unbinds from it. The combination of both * DL_FLAG_AUTOREMOVE and DL_FLAG_STATELESS set is invalid and will cause NULL @@ -193,10 +201,19 @@ struct device_link *device_link_add(struct device *consumer, if (link->consumer == consumer) goto out; - link = kmalloc(sizeof(*link), GFP_KERNEL); + link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) goto out; + if ((flags & DL_FLAG_PM_RUNTIME) && (flags & DL_FLAG_RPM_ACTIVE)) { + if (pm_runtime_get_sync(supplier) < 0) { + pm_runtime_put_noidle(supplier); + kfree(link); + link = NULL; + goto out; + } + link->rpm_active = true; + } get_device(supplier); link->supplier = supplier; INIT_LIST_HEAD(&link->s_node); @@ -213,6 +230,14 @@ struct device_link *device_link_add(struct device *consumer, case DL_DEV_DRIVER_BOUND: switch (consumer->links.status) { case DL_DEV_PROBING: + /* + * Balance the decrementation of the supplier's + * runtime PM usage counter after consumer probe + * in driver_probe_device(). + */ + if (flags & DL_FLAG_PM_RUNTIME) + pm_runtime_get_sync(supplier); + link->status = DL_STATE_CONSUMER_PROBE; break; case DL_DEV_DRIVER_BOUND: diff --git a/drivers/base/dd.c b/drivers/base/dd.c index b2bca3cf0dd2..43be1cc751a4 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -513,6 +513,7 @@ int driver_probe_device(struct device_driver *drv, struct device *dev) pr_debug("bus: '%s': %s: matched device %s with driver %s\n", drv->bus->name, __func__, dev_name(dev), drv->name); + pm_runtime_get_suppliers(dev); if (dev->parent) pm_runtime_get_sync(dev->parent); @@ -523,6 +524,7 @@ int driver_probe_device(struct device_driver *drv, struct device *dev) if (dev->parent) pm_runtime_put(dev->parent); + pm_runtime_put_suppliers(dev); return ret; } @@ -806,6 +808,7 @@ static void __device_release_driver(struct device *dev, struct device *parent) } pm_runtime_get_sync(dev); + pm_runtime_clean_up_links(dev); driver_sysfs_remove(dev); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 82a081ea4317..462f90e952f8 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -12,6 +12,8 @@ #include #include #include + +#include "../base.h" #include "power.h" typedef int (*pm_callback_t)(struct device *); @@ -258,6 +260,42 @@ static int rpm_check_suspend_allowed(struct device *dev) return retval; } +static int rpm_get_suppliers(struct device *dev) +{ + struct device_link *link; + + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) { + int retval; + + if (!(link->flags & DL_FLAG_PM_RUNTIME)) + continue; + + if (READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND || + link->rpm_active) + continue; + + retval = pm_runtime_get_sync(link->supplier); + if (retval < 0) { + pm_runtime_put_noidle(link->supplier); + return retval; + } + link->rpm_active = true; + } + return 0; +} + +static void rpm_put_suppliers(struct device *dev) +{ + struct device_link *link; + + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) + if (link->rpm_active && + READ_ONCE(link->status) != DL_STATE_SUPPLIER_UNBIND) { + pm_runtime_put(link->supplier); + link->rpm_active = false; + } +} + /** * __rpm_callback - Run a given runtime PM callback for a given device. * @cb: Runtime PM callback to run. @@ -266,19 +304,55 @@ static int rpm_check_suspend_allowed(struct device *dev) static int __rpm_callback(int (*cb)(struct device *), struct device *dev) __releases(&dev->power.lock) __acquires(&dev->power.lock) { - int retval; + int retval, idx; - if (dev->power.irq_safe) + if (dev->power.irq_safe) { spin_unlock(&dev->power.lock); - else + } else { spin_unlock_irq(&dev->power.lock); + /* + * Resume suppliers if necessary. + * + * The device's runtime PM status cannot change until this + * routine returns, so it is safe to read the status outside of + * the lock. + */ + if (dev->power.runtime_status == RPM_RESUMING) { + idx = device_links_read_lock(); + + retval = rpm_get_suppliers(dev); + if (retval) + goto fail; + + device_links_read_unlock(idx); + } + } + retval = cb(dev); - if (dev->power.irq_safe) + if (dev->power.irq_safe) { spin_lock(&dev->power.lock); - else + } else { + /* + * If the device is suspending and the callback has returned + * success, drop the usage counters of the suppliers that have + * been reference counted on its resume. + * + * Do that if resume fails too. + */ + if ((dev->power.runtime_status == RPM_SUSPENDING && !retval) + || (dev->power.runtime_status == RPM_RESUMING && retval)) { + idx = device_links_read_lock(); + + fail: + rpm_put_suppliers(dev); + + device_links_read_unlock(idx); + } + spin_lock_irq(&dev->power.lock); + } return retval; } @@ -1446,6 +1520,79 @@ void pm_runtime_remove(struct device *dev) pm_runtime_reinit(dev); } +/** + * pm_runtime_clean_up_links - Prepare links to consumers for driver removal. + * @dev: Device whose driver is going to be removed. + * + * Check links from this device to any consumers and if any of them have active + * runtime PM references to the device, drop the usage counter of the device + * (once per link). + * + * Links with the DL_FLAG_STATELESS flag set are ignored. + * + * Since the device is guaranteed to be runtime-active at the point this is + * called, nothing else needs to be done here. + * + * Moreover, this is called after device_links_busy() has returned 'false', so + * the status of each link is guaranteed to be DL_STATE_SUPPLIER_UNBIND and + * therefore rpm_active can't be manipulated concurrently. + */ +void pm_runtime_clean_up_links(struct device *dev) +{ + struct device_link *link; + int idx; + + idx = device_links_read_lock(); + + list_for_each_entry_rcu(link, &dev->links.consumers, s_node) { + if (link->flags & DL_FLAG_STATELESS) + continue; + + if (link->rpm_active) { + pm_runtime_put_noidle(dev); + link->rpm_active = false; + } + } + + device_links_read_unlock(idx); +} + +/** + * pm_runtime_get_suppliers - Resume and reference-count supplier devices. + * @dev: Consumer device. + */ +void pm_runtime_get_suppliers(struct device *dev) +{ + struct device_link *link; + int idx; + + idx = device_links_read_lock(); + + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) + if (link->flags & DL_FLAG_PM_RUNTIME) + pm_runtime_get_sync(link->supplier); + + device_links_read_unlock(idx); +} + +/** + * pm_runtime_put_suppliers - Drop references to supplier devices. + * @dev: Consumer device. + */ +void pm_runtime_put_suppliers(struct device *dev) +{ + struct device_link *link; + int idx; + + idx = device_links_read_lock(); + + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) + if (link->flags & DL_FLAG_PM_RUNTIME) + pm_runtime_put(link->supplier); + + device_links_read_unlock(idx); +} + /** * pm_runtime_force_suspend - Force a device into suspend state if needed. * @dev: Device to suspend. diff --git a/include/linux/device.h b/include/linux/device.h index 9cae2feaf5cb..49f453892ca5 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -730,9 +730,13 @@ enum device_link_state { * * STATELESS: The core won't track the presence of supplier/consumer drivers. * AUTOREMOVE: Remove this link automatically on consumer driver unbind. + * PM_RUNTIME: If set, the runtime PM framework will use this link. + * RPM_ACTIVE: Run pm_runtime_get_sync() on the supplier during link creation. */ #define DL_FLAG_STATELESS BIT(0) #define DL_FLAG_AUTOREMOVE BIT(1) +#define DL_FLAG_PM_RUNTIME BIT(2) +#define DL_FLAG_RPM_ACTIVE BIT(3) /** * struct device_link - Device link representation. @@ -742,6 +746,7 @@ enum device_link_state { * @c_node: Hook to the consumer device's list of links to suppliers. * @status: The state of the link (with respect to the presence of drivers). * @flags: Link flags. + * @rpm_active: Whether or not the consumer device is runtime-PM-active. * @rcu_head: An RCU head to use for deferred execution of SRCU callbacks. */ struct device_link { @@ -751,6 +756,7 @@ struct device_link { struct list_head c_node; enum device_link_state status; u32 flags; + bool rpm_active; #ifdef CONFIG_SRCU struct rcu_head rcu_head; #endif diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 2e14d2667b6c..c2ee87138e4a 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -55,6 +55,9 @@ extern unsigned long pm_runtime_autosuspend_expiration(struct device *dev); extern void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns); extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable); +extern void pm_runtime_clean_up_links(struct device *dev); +extern void pm_runtime_get_suppliers(struct device *dev); +extern void pm_runtime_put_suppliers(struct device *dev); static inline void pm_suspend_ignore_children(struct device *dev, bool enable) { @@ -186,6 +189,9 @@ static inline unsigned long pm_runtime_autosuspend_expiration( struct device *dev) { return 0; } static inline void pm_runtime_set_memalloc_noio(struct device *dev, bool enable){} +static inline void pm_runtime_clean_up_links(struct device *dev) {} +static inline void pm_runtime_get_suppliers(struct device *dev) {} +static inline void pm_runtime_put_suppliers(struct device *dev) {} #endif /* !CONFIG_PM */ -- cgit v1.2.3 From baa8809f60971d10220dfe79248f54b2b265f003 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 30 Oct 2016 17:32:43 +0100 Subject: PM / runtime: Optimize the use of device links If the device has no links to suppliers that should be used for runtime PM (links with DEVICE_LINK_PM_RUNTIME set), there is no reason to walk the list of suppliers for that device during runtime suspend and resume. Add a simple mechanism to detect that case and possibly avoid the extra unnecessary overhead. Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 20 +++++++++++++------- drivers/base/power/runtime.c | 23 ++++++++++++++++++++--- include/linux/pm.h | 1 + include/linux/pm_runtime.h | 4 ++++ 4 files changed, 38 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/core.c b/drivers/base/core.c index 876c62b12462..d0c9df5cdd9e 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -205,14 +205,17 @@ struct device_link *device_link_add(struct device *consumer, if (!link) goto out; - if ((flags & DL_FLAG_PM_RUNTIME) && (flags & DL_FLAG_RPM_ACTIVE)) { - if (pm_runtime_get_sync(supplier) < 0) { - pm_runtime_put_noidle(supplier); - kfree(link); - link = NULL; - goto out; + if (flags & DL_FLAG_PM_RUNTIME) { + if (flags & DL_FLAG_RPM_ACTIVE) { + if (pm_runtime_get_sync(supplier) < 0) { + pm_runtime_put_noidle(supplier); + kfree(link); + link = NULL; + goto out; + } + link->rpm_active = true; } - link->rpm_active = true; + pm_runtime_new_link(consumer); } get_device(supplier); link->supplier = supplier; @@ -296,6 +299,9 @@ static void __device_link_del(struct device_link *link) dev_info(link->consumer, "Dropping the link to %s\n", dev_name(link->supplier)); + if (link->flags & DL_FLAG_PM_RUNTIME) + pm_runtime_drop_link(link->consumer); + list_del_rcu(&link->s_node); list_del_rcu(&link->c_node); call_srcu(&device_links_srcu, &link->rcu_head, __device_link_free_srcu); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 462f90e952f8..ba7b4a8c07e5 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -305,6 +305,7 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev) __releases(&dev->power.lock) __acquires(&dev->power.lock) { int retval, idx; + bool use_links = dev->power.links_count > 0; if (dev->power.irq_safe) { spin_unlock(&dev->power.lock); @@ -318,7 +319,7 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev) * routine returns, so it is safe to read the status outside of * the lock. */ - if (dev->power.runtime_status == RPM_RESUMING) { + if (use_links && dev->power.runtime_status == RPM_RESUMING) { idx = device_links_read_lock(); retval = rpm_get_suppliers(dev); @@ -341,8 +342,9 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev) * * Do that if resume fails too. */ - if ((dev->power.runtime_status == RPM_SUSPENDING && !retval) - || (dev->power.runtime_status == RPM_RESUMING && retval)) { + if (use_links + && ((dev->power.runtime_status == RPM_SUSPENDING && !retval) + || (dev->power.runtime_status == RPM_RESUMING && retval))) { idx = device_links_read_lock(); fail: @@ -1593,6 +1595,21 @@ void pm_runtime_put_suppliers(struct device *dev) device_links_read_unlock(idx); } +void pm_runtime_new_link(struct device *dev) +{ + spin_lock_irq(&dev->power.lock); + dev->power.links_count++; + spin_unlock_irq(&dev->power.lock); +} + +void pm_runtime_drop_link(struct device *dev) +{ + spin_lock_irq(&dev->power.lock); + WARN_ON(dev->power.links_count == 0); + dev->power.links_count--; + spin_unlock_irq(&dev->power.lock); +} + /** * pm_runtime_force_suspend - Force a device into suspend state if needed. * @dev: Device to suspend. diff --git a/include/linux/pm.h b/include/linux/pm.h index 721a70241fcd..ccfe00ecc7e6 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -597,6 +597,7 @@ struct dev_pm_info { unsigned int use_autosuspend:1; unsigned int timer_autosuspends:1; unsigned int memalloc_noio:1; + unsigned int links_count; enum rpm_request request; enum rpm_status runtime_status; int runtime_error; diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index c2ee87138e4a..73814877537d 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -58,6 +58,8 @@ extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable); extern void pm_runtime_clean_up_links(struct device *dev); extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); +extern void pm_runtime_new_link(struct device *dev); +extern void pm_runtime_drop_link(struct device *dev); static inline void pm_suspend_ignore_children(struct device *dev, bool enable) { @@ -192,6 +194,8 @@ static inline void pm_runtime_set_memalloc_noio(struct device *dev, static inline void pm_runtime_clean_up_links(struct device *dev) {} static inline void pm_runtime_get_suppliers(struct device *dev) {} static inline void pm_runtime_put_suppliers(struct device *dev) {} +static inline void pm_runtime_new_link(struct device *dev) {} +static inline void pm_runtime_drop_link(struct device *dev) {} #endif /* !CONFIG_PM */ -- cgit v1.2.3 From 9cf1f6a8c4cbb7836b838b51b3b02ddf32c6c6a0 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 28 Oct 2016 11:43:20 -0400 Subject: net: Move functions for configuring traffic classes out of inline headers The functions for configuring the traffic class to queue mappings have other effects that need to be addressed. Instead of trying to export a bunch of new functions just relocate the functions so that we can instrument them directly with the functionality they will need. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 31 +++---------------------------- net/core/dev.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 20ce8df115ac..e05ab3bd48d2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1920,34 +1920,9 @@ int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc) return 0; } -static inline -void netdev_reset_tc(struct net_device *dev) -{ - dev->num_tc = 0; - memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq)); - memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map)); -} - -static inline -int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset) -{ - if (tc >= dev->num_tc) - return -EINVAL; - - dev->tc_to_txq[tc].count = count; - dev->tc_to_txq[tc].offset = offset; - return 0; -} - -static inline -int netdev_set_num_tc(struct net_device *dev, u8 num_tc) -{ - if (num_tc > TC_MAX_QUEUE) - return -EINVAL; - - dev->num_tc = num_tc; - return 0; -} +void netdev_reset_tc(struct net_device *dev); +int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset); +int netdev_set_num_tc(struct net_device *dev, u8 num_tc); static inline int netdev_get_num_tc(struct net_device *dev) diff --git a/net/core/dev.c b/net/core/dev.c index 8341dadf5e94..2d54be912136 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2173,6 +2173,35 @@ error: EXPORT_SYMBOL(netif_set_xps_queue); #endif +void netdev_reset_tc(struct net_device *dev) +{ + dev->num_tc = 0; + memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq)); + memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map)); +} +EXPORT_SYMBOL(netdev_reset_tc); + +int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset) +{ + if (tc >= dev->num_tc) + return -EINVAL; + + dev->tc_to_txq[tc].count = count; + dev->tc_to_txq[tc].offset = offset; + return 0; +} +EXPORT_SYMBOL(netdev_set_tc_queue); + +int netdev_set_num_tc(struct net_device *dev, u8 num_tc) +{ + if (num_tc > TC_MAX_QUEUE) + return -EINVAL; + + dev->num_tc = num_tc; + return 0; +} +EXPORT_SYMBOL(netdev_set_num_tc); + /* * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. -- cgit v1.2.3 From 8d059b0f6f5b1d3acf829454e1087818ad660058 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 28 Oct 2016 11:43:49 -0400 Subject: net: Add sysfs value to determine queue traffic class Add a sysfs attribute for a Tx queue that allows us to determine the traffic class for a given queue. This will allow us to more easily determine this in the future. It is needed as XPS will take the traffic class for a group of queues into account in order to avoid pulling traffic from one traffic class into another. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/core/dev.c | 17 +++++++++++++++++ net/core/net-sysfs.c | 20 +++++++++++++++++++- 3 files changed, 37 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e05ab3bd48d2..d91a41860614 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1920,6 +1920,7 @@ int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc) return 0; } +int netdev_txq_to_tc(struct net_device *dev, unsigned int txq); void netdev_reset_tc(struct net_device *dev); int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset); int netdev_set_num_tc(struct net_device *dev, u8 num_tc); diff --git a/net/core/dev.c b/net/core/dev.c index 2d54be912136..db0fdbbcd9b8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1948,6 +1948,23 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq) } } +int netdev_txq_to_tc(struct net_device *dev, unsigned int txq) +{ + if (dev->num_tc) { + struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; + int i; + + for (i = 0; i < TC_MAX_QUEUE; i++, tc++) { + if ((txq - tc->offset) < tc->count) + return i; + } + + return -1; + } + + return 0; +} + #ifdef CONFIG_XPS static DEFINE_MUTEX(xps_map_mutex); #define xmap_dereference(P) \ diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index d4fe28606ff5..38bd9b933195 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1024,7 +1024,6 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue, return sprintf(buf, "%lu", trans_timeout); } -#ifdef CONFIG_XPS static unsigned int get_netdev_queue_index(struct netdev_queue *queue) { struct net_device *dev = queue->dev; @@ -1036,6 +1035,21 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue) return i; } +static ssize_t show_traffic_class(struct netdev_queue *queue, + struct netdev_queue_attribute *attribute, + char *buf) +{ + struct net_device *dev = queue->dev; + int index = get_netdev_queue_index(queue); + int tc = netdev_txq_to_tc(dev, index); + + if (tc < 0) + return -EINVAL; + + return sprintf(buf, "%u\n", tc); +} + +#ifdef CONFIG_XPS static ssize_t show_tx_maxrate(struct netdev_queue *queue, struct netdev_queue_attribute *attribute, char *buf) @@ -1078,6 +1092,9 @@ static struct netdev_queue_attribute queue_tx_maxrate = static struct netdev_queue_attribute queue_trans_timeout = __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL); +static struct netdev_queue_attribute queue_traffic_class = + __ATTR(traffic_class, S_IRUGO, show_traffic_class, NULL); + #ifdef CONFIG_BQL /* * Byte queue limits sysfs structures and functions. @@ -1263,6 +1280,7 @@ static struct netdev_queue_attribute xps_cpus_attribute = static struct attribute *netdev_queue_default_attrs[] = { &queue_trans_timeout.attr, + &queue_traffic_class.attr, #ifdef CONFIG_XPS &xps_cpus_attribute.attr, &queue_tx_maxrate.attr, -- cgit v1.2.3 From 184c449f91fef521042970cca46bd5cdfc0e3a37 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 28 Oct 2016 11:50:13 -0400 Subject: net: Add support for XPS with QoS via traffic classes This patch adds support for setting and using XPS when QoS via traffic classes is enabled. With this change we will factor in the priority and traffic class mapping of the packet and use that information to correctly select the queue. This allows us to define a set of queues for a given traffic class via mqprio and then configure the XPS mapping for those queues so that the traffic flows can avoid head-of-line blocking between the individual CPUs if so desired. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +- net/core/dev.c | 117 ++++++++++++++++++++++++++++++++-------------- net/core/net-sysfs.c | 31 +++++++----- 3 files changed, 105 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d91a41860614..66fd61c681d9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -732,8 +732,8 @@ struct xps_dev_maps { struct rcu_head rcu; struct xps_map __rcu *cpu_map[0]; }; -#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) + \ - (nr_cpu_ids * sizeof(struct xps_map *))) +#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) + \ + (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *))) #endif /* CONFIG_XPS */ #define TC_MAX_QUEUE 16 diff --git a/net/core/dev.c b/net/core/dev.c index 108a6adce185..f23e28668f32 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2002,14 +2002,22 @@ static bool remove_xps_queue_cpu(struct net_device *dev, struct xps_dev_maps *dev_maps, int cpu, u16 offset, u16 count) { - int i, j; + int num_tc = dev->num_tc ? : 1; + bool active = false; + int tci; - for (i = count, j = offset; i--; j++) { - if (!remove_xps_queue(dev_maps, cpu, j)) - break; + for (tci = cpu * num_tc; num_tc--; tci++) { + int i, j; + + for (i = count, j = offset; i--; j++) { + if (!remove_xps_queue(dev_maps, cpu, j)) + break; + } + + active |= i < 0; } - return i < 0; + return active; } static void netif_reset_xps_queues(struct net_device *dev, u16 offset, @@ -2086,20 +2094,28 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, u16 index) { struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; + int i, cpu, tci, numa_node_id = -2; + int maps_sz, num_tc = 1, tc = 0; struct xps_map *map, *new_map; - int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES); - int cpu, numa_node_id = -2; bool active = false; + if (dev->num_tc) { + num_tc = dev->num_tc; + tc = netdev_txq_to_tc(dev, index); + if (tc < 0) + return -EINVAL; + } + + maps_sz = XPS_DEV_MAPS_SIZE(num_tc); + if (maps_sz < L1_CACHE_BYTES) + maps_sz = L1_CACHE_BYTES; + mutex_lock(&xps_map_mutex); dev_maps = xmap_dereference(dev->xps_maps); /* allocate memory for queue storage */ - for_each_online_cpu(cpu) { - if (!cpumask_test_cpu(cpu, mask)) - continue; - + for_each_cpu_and(cpu, cpu_online_mask, mask) { if (!new_dev_maps) new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); if (!new_dev_maps) { @@ -2107,25 +2123,38 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, return -ENOMEM; } - map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : + tci = cpu * num_tc + tc; + map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) : NULL; map = expand_xps_map(map, cpu, index); if (!map) goto error; - RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); + RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); } if (!new_dev_maps) goto out_no_new_maps; for_each_possible_cpu(cpu) { + /* copy maps belonging to foreign traffic classes */ + for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) { + /* fill in the new device map from the old device map */ + map = xmap_dereference(dev_maps->cpu_map[tci]); + RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); + } + + /* We need to explicitly update tci as prevous loop + * could break out early if dev_maps is NULL. + */ + tci = cpu * num_tc + tc; + if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) { /* add queue to CPU maps */ int pos = 0; - map = xmap_dereference(new_dev_maps->cpu_map[cpu]); + map = xmap_dereference(new_dev_maps->cpu_map[tci]); while ((pos < map->len) && (map->queues[pos] != index)) pos++; @@ -2139,26 +2168,36 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, #endif } else if (dev_maps) { /* fill in the new device map from the old device map */ - map = xmap_dereference(dev_maps->cpu_map[cpu]); - RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); + map = xmap_dereference(dev_maps->cpu_map[tci]); + RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); } + /* copy maps belonging to foreign traffic classes */ + for (i = num_tc - tc, tci++; dev_maps && --i; tci++) { + /* fill in the new device map from the old device map */ + map = xmap_dereference(dev_maps->cpu_map[tci]); + RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); + } } rcu_assign_pointer(dev->xps_maps, new_dev_maps); /* Cleanup old maps */ - if (dev_maps) { - for_each_possible_cpu(cpu) { - new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); - map = xmap_dereference(dev_maps->cpu_map[cpu]); + if (!dev_maps) + goto out_no_old_maps; + + for_each_possible_cpu(cpu) { + for (i = num_tc, tci = cpu * num_tc; i--; tci++) { + new_map = xmap_dereference(new_dev_maps->cpu_map[tci]); + map = xmap_dereference(dev_maps->cpu_map[tci]); if (map && map != new_map) kfree_rcu(map, rcu); } - - kfree_rcu(dev_maps, rcu); } + kfree_rcu(dev_maps, rcu); + +out_no_old_maps: dev_maps = new_dev_maps; active = true; @@ -2173,11 +2212,12 @@ out_no_new_maps: /* removes queue from unused CPUs */ for_each_possible_cpu(cpu) { - if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) - continue; - - if (remove_xps_queue(dev_maps, cpu, index)) - active = true; + for (i = tc, tci = cpu * num_tc; i--; tci++) + active |= remove_xps_queue(dev_maps, tci, index); + if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu)) + active |= remove_xps_queue(dev_maps, tci, index); + for (i = num_tc - tc, tci++; --i; tci++) + active |= remove_xps_queue(dev_maps, tci, index); } /* free map if not active */ @@ -2193,11 +2233,14 @@ out_no_maps: error: /* remove any maps that we added */ for_each_possible_cpu(cpu) { - new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); - map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : - NULL; - if (new_map && new_map != map) - kfree(new_map); + for (i = num_tc, tci = cpu * num_tc; i--; tci++) { + new_map = xmap_dereference(new_dev_maps->cpu_map[tci]); + map = dev_maps ? + xmap_dereference(dev_maps->cpu_map[tci]) : + NULL; + if (new_map && new_map != map) + kfree(new_map); + } } mutex_unlock(&xps_map_mutex); @@ -3158,8 +3201,14 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) rcu_read_lock(); dev_maps = rcu_dereference(dev->xps_maps); if (dev_maps) { - map = rcu_dereference( - dev_maps->cpu_map[skb->sender_cpu - 1]); + unsigned int tci = skb->sender_cpu - 1; + + if (dev->num_tc) { + tci *= dev->num_tc; + tci += netdev_get_prio_tc_map(dev, skb->priority); + } + + map = rcu_dereference(dev_maps->cpu_map[tci]); if (map) { if (map->len == 1) queue_index = map->queues[0]; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 38bd9b933195..b0c04cf4851d 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1210,29 +1210,38 @@ static ssize_t show_xps_map(struct netdev_queue *queue, struct netdev_queue_attribute *attribute, char *buf) { struct net_device *dev = queue->dev; + int cpu, len, num_tc = 1, tc = 0; struct xps_dev_maps *dev_maps; cpumask_var_t mask; unsigned long index; - int i, len; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; index = get_netdev_queue_index(queue); + if (dev->num_tc) { + num_tc = dev->num_tc; + tc = netdev_txq_to_tc(dev, index); + if (tc < 0) + return -EINVAL; + } + rcu_read_lock(); dev_maps = rcu_dereference(dev->xps_maps); if (dev_maps) { - for_each_possible_cpu(i) { - struct xps_map *map = - rcu_dereference(dev_maps->cpu_map[i]); - if (map) { - int j; - for (j = 0; j < map->len; j++) { - if (map->queues[j] == index) { - cpumask_set_cpu(i, mask); - break; - } + for_each_possible_cpu(cpu) { + int i, tci = cpu * num_tc + tc; + struct xps_map *map; + + map = rcu_dereference(dev_maps->cpu_map[tci]); + if (!map) + continue; + + for (i = map->len; i--;) { + if (map->queues[i] == index) { + cpumask_set_cpu(cpu, mask); + break; } } } -- cgit v1.2.3 From 0fefbfbaad298162737d5418eb85065879f99b3e Mon Sep 17 00:00:00 2001 From: Sudarsana Kalluru Date: Mon, 31 Oct 2016 07:14:21 +0200 Subject: qed*: Management firmware - notifications and defaults Management firmware is interested in various tidbits about the driver - including the driver state & several configuration related fields [MTU, primtary MAC, etc.]. This adds the necessray logic to update MFW with such configurations, some of which are passed directly via qed while for others APIs are provide so that qede would be able to later configure if needed. This also introduces a new default configuration for MTU which would replace the default inherited by being an ethernet device. Signed-off-by: Sudarsana Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed.h | 1 + drivers/net/ethernet/qlogic/qed/qed_dev.c | 52 +++++++- drivers/net/ethernet/qlogic/qed/qed_hsi.h | 59 ++++++++- drivers/net/ethernet/qlogic/qed/qed_main.c | 75 +++++++++++ drivers/net/ethernet/qlogic/qed/qed_mcp.c | 163 ++++++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_mcp.h | 102 +++++++++++++++ drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 2 + drivers/net/ethernet/qlogic/qede/qede_main.c | 8 ++ include/linux/qed/qed_if.h | 28 ++++ 9 files changed, 487 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 653bb5735f0c..f20243c1fb0b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -226,6 +226,7 @@ struct qed_hw_info { u32 port_mode; u32 hw_mode; unsigned long device_capabilities; + u16 mtu; }; struct qed_hw_cid_data { diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index edae5fc5fccd..33fd69e24bae 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -1057,8 +1057,10 @@ int qed_hw_init(struct qed_dev *cdev, bool allow_npar_tx_switch, const u8 *bin_fw_data) { - u32 load_code, param; - int rc, mfw_rc, i; + u32 load_code, param, drv_mb_param; + bool b_default_mtu = true; + struct qed_hwfn *p_hwfn; + int rc = 0, mfw_rc, i; if ((int_mode == QED_INT_MODE_MSI) && (cdev->num_hwfns > 1)) { DP_NOTICE(cdev, "MSI mode is not supported for CMT devices\n"); @@ -1074,6 +1076,12 @@ int qed_hw_init(struct qed_dev *cdev, for_each_hwfn(cdev, i) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; + /* If management didn't provide a default, set one of our own */ + if (!p_hwfn->hw_info.mtu) { + p_hwfn->hw_info.mtu = 1500; + b_default_mtu = false; + } + if (IS_VF(cdev)) { p_hwfn->b_int_enabled = 1; continue; @@ -1157,6 +1165,38 @@ int qed_hw_init(struct qed_dev *cdev, p_hwfn->hw_init_done = true; } + if (IS_PF(cdev)) { + p_hwfn = QED_LEADING_HWFN(cdev); + drv_mb_param = (FW_MAJOR_VERSION << 24) | + (FW_MINOR_VERSION << 16) | + (FW_REVISION_VERSION << 8) | + (FW_ENGINEERING_VERSION); + rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt, + DRV_MSG_CODE_OV_UPDATE_STORM_FW_VER, + drv_mb_param, &load_code, ¶m); + if (rc) + DP_INFO(p_hwfn, "Failed to update firmware version\n"); + + if (!b_default_mtu) { + rc = qed_mcp_ov_update_mtu(p_hwfn, p_hwfn->p_main_ptt, + p_hwfn->hw_info.mtu); + if (rc) + DP_INFO(p_hwfn, + "Failed to update default mtu\n"); + } + + rc = qed_mcp_ov_update_driver_state(p_hwfn, + p_hwfn->p_main_ptt, + QED_OV_DRIVER_STATE_DISABLED); + if (rc) + DP_INFO(p_hwfn, "Failed to update driver state\n"); + + rc = qed_mcp_ov_update_eswitch(p_hwfn, p_hwfn->p_main_ptt, + QED_OV_ESWITCH_VEB); + if (rc) + DP_INFO(p_hwfn, "Failed to update eswitch mode\n"); + } + return 0; } @@ -1801,6 +1841,9 @@ qed_get_hw_info(struct qed_hwfn *p_hwfn, qed_get_num_funcs(p_hwfn, p_ptt); + if (qed_mcp_is_init(p_hwfn)) + p_hwfn->hw_info.mtu = p_hwfn->mcp_info->func_info.mtu; + return qed_hw_get_resc(p_hwfn); } @@ -1975,8 +2018,13 @@ int qed_hw_prepare(struct qed_dev *cdev, void qed_hw_remove(struct qed_dev *cdev) { + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); int i; + if (IS_PF(cdev)) + qed_mcp_ov_update_driver_state(p_hwfn, p_hwfn->p_main_ptt, + QED_OV_DRIVER_STATE_NOT_LOADED); + for_each_hwfn(cdev, i) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 72eee29c677f..36de87a1befa 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -8564,6 +8564,15 @@ struct public_drv_mb { #define DRV_MSG_CODE_INIT_PHY 0x22000000 #define DRV_MSG_CODE_LINK_RESET 0x23000000 #define DRV_MSG_CODE_SET_DCBX 0x25000000 +#define DRV_MSG_CODE_OV_UPDATE_CURR_CFG 0x26000000 +#define DRV_MSG_CODE_OV_UPDATE_BUS_NUM 0x27000000 +#define DRV_MSG_CODE_OV_UPDATE_BOOT_PROGRESS 0x28000000 +#define DRV_MSG_CODE_OV_UPDATE_STORM_FW_VER 0x29000000 +#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE 0x31000000 +#define DRV_MSG_CODE_BW_UPDATE_ACK 0x32000000 +#define DRV_MSG_CODE_OV_UPDATE_MTU 0x33000000 +#define DRV_MSG_CODE_OV_UPDATE_WOL 0x38000000 +#define DRV_MSG_CODE_OV_UPDATE_ESWITCH_MODE 0x39000000 #define DRV_MSG_CODE_BW_UPDATE_ACK 0x32000000 #define DRV_MSG_CODE_NIG_DRAIN 0x30000000 @@ -8574,6 +8583,13 @@ struct public_drv_mb { #define DRV_MSG_CODE_MCP_RESET 0x00090000 #define DRV_MSG_CODE_SET_VERSION 0x000f0000 #define DRV_MSG_CODE_MCP_HALT 0x00100000 +#define DRV_MSG_CODE_SET_VMAC 0x00110000 +#define DRV_MSG_CODE_GET_VMAC 0x00120000 +#define DRV_MSG_CODE_VMAC_TYPE_SHIFT 4 +#define DRV_MSG_CODE_VMAC_TYPE_MASK 0x30 +#define DRV_MSG_CODE_VMAC_TYPE_MAC 1 +#define DRV_MSG_CODE_VMAC_TYPE_WWNN 2 +#define DRV_MSG_CODE_VMAC_TYPE_WWPN 3 #define DRV_MSG_CODE_GET_STATS 0x00130000 #define DRV_MSG_CODE_STATS_TYPE_LAN 1 @@ -8589,7 +8605,10 @@ struct public_drv_mb { #define DRV_MSG_SEQ_NUMBER_MASK 0x0000ffff u32 drv_mb_param; -#define DRV_MB_PARAM_UNLOAD_WOL_MCP 0x00000001 +#define DRV_MB_PARAM_UNLOAD_WOL_UNKNOWN 0x00000000 +#define DRV_MB_PARAM_UNLOAD_WOL_MCP 0x00000001 +#define DRV_MB_PARAM_UNLOAD_WOL_DISABLED 0x00000002 +#define DRV_MB_PARAM_UNLOAD_WOL_ENABLED 0x00000003 #define DRV_MB_PARAM_DCBX_NOTIFY_MASK 0x000000FF #define DRV_MB_PARAM_DCBX_NOTIFY_SHIFT 3 @@ -8602,6 +8621,44 @@ struct public_drv_mb { #define DRV_MB_PARAM_LLDP_SEND_MASK 0x00000001 #define DRV_MB_PARAM_LLDP_SEND_SHIFT 0 +#define DRV_MB_PARAM_OV_CURR_CFG_SHIFT 0 +#define DRV_MB_PARAM_OV_CURR_CFG_MASK 0x0000000F +#define DRV_MB_PARAM_OV_CURR_CFG_NONE 0 +#define DRV_MB_PARAM_OV_CURR_CFG_OS 1 +#define DRV_MB_PARAM_OV_CURR_CFG_VENDOR_SPEC 2 +#define DRV_MB_PARAM_OV_CURR_CFG_OTHER 3 + +#define DRV_MB_PARAM_OV_STORM_FW_VER_SHIFT 0 +#define DRV_MB_PARAM_OV_STORM_FW_VER_MASK 0xFFFFFFFF +#define DRV_MB_PARAM_OV_STORM_FW_VER_MAJOR_MASK 0xFF000000 +#define DRV_MB_PARAM_OV_STORM_FW_VER_MINOR_MASK 0x00FF0000 +#define DRV_MB_PARAM_OV_STORM_FW_VER_BUILD_MASK 0x0000FF00 +#define DRV_MB_PARAM_OV_STORM_FW_VER_DROP_MASK 0x000000FF + +#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_SHIFT 0 +#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_MASK 0xF +#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_UNKNOWN 0x1 +#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_NOT_LOADED 0x2 +#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_LOADING 0x3 +#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_DISABLED 0x4 +#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_ACTIVE 0x5 + +#define DRV_MB_PARAM_OV_MTU_SIZE_SHIFT 0 +#define DRV_MB_PARAM_OV_MTU_SIZE_MASK 0xFFFFFFFF + +#define DRV_MB_PARAM_WOL_MASK (DRV_MB_PARAM_WOL_DEFAULT | \ + DRV_MB_PARAM_WOL_DISABLED | \ + DRV_MB_PARAM_WOL_ENABLED) +#define DRV_MB_PARAM_WOL_DEFAULT DRV_MB_PARAM_UNLOAD_WOL_MCP +#define DRV_MB_PARAM_WOL_DISABLED DRV_MB_PARAM_UNLOAD_WOL_DISABLED +#define DRV_MB_PARAM_WOL_ENABLED DRV_MB_PARAM_UNLOAD_WOL_ENABLED + +#define DRV_MB_PARAM_ESWITCH_MODE_MASK (DRV_MB_PARAM_ESWITCH_MODE_NONE | \ + DRV_MB_PARAM_ESWITCH_MODE_VEB | \ + DRV_MB_PARAM_ESWITCH_MODE_VEPA) +#define DRV_MB_PARAM_ESWITCH_MODE_NONE 0x0 +#define DRV_MB_PARAM_ESWITCH_MODE_VEB 0x1 +#define DRV_MB_PARAM_ESWITCH_MODE_VEPA 0x2 #define DRV_MB_PARAM_SET_LED_MODE_OPER 0x0 #define DRV_MB_PARAM_SET_LED_MODE_ON 0x1 diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index c418360ba02a..d9fa52a22667 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -243,6 +243,8 @@ int qed_fill_dev_info(struct qed_dev *cdev, &dev_info->mfw_rev, NULL); } + dev_info->mtu = QED_LEADING_HWFN(cdev)->hw_info.mtu; + return 0; } @@ -1431,6 +1433,76 @@ static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode) return status; } +static int qed_update_drv_state(struct qed_dev *cdev, bool active) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *ptt; + int status = 0; + + if (IS_VF(cdev)) + return 0; + + ptt = qed_ptt_acquire(hwfn); + if (!ptt) + return -EAGAIN; + + status = qed_mcp_ov_update_driver_state(hwfn, ptt, active ? + QED_OV_DRIVER_STATE_ACTIVE : + QED_OV_DRIVER_STATE_DISABLED); + + qed_ptt_release(hwfn, ptt); + + return status; +} + +static int qed_update_mac(struct qed_dev *cdev, u8 *mac) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *ptt; + int status = 0; + + if (IS_VF(cdev)) + return 0; + + ptt = qed_ptt_acquire(hwfn); + if (!ptt) + return -EAGAIN; + + status = qed_mcp_ov_update_mac(hwfn, ptt, mac); + if (status) + goto out; + + status = qed_mcp_ov_update_current_config(hwfn, ptt, QED_OV_CLIENT_DRV); + +out: + qed_ptt_release(hwfn, ptt); + return status; +} + +static int qed_update_mtu(struct qed_dev *cdev, u16 mtu) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *ptt; + int status = 0; + + if (IS_VF(cdev)) + return 0; + + ptt = qed_ptt_acquire(hwfn); + if (!ptt) + return -EAGAIN; + + status = qed_mcp_ov_update_mtu(hwfn, ptt, mtu); + if (status) + goto out; + + status = qed_mcp_ov_update_current_config(hwfn, ptt, QED_OV_CLIENT_DRV); + +out: + qed_ptt_release(hwfn, ptt); + return status; +} + static struct qed_selftest_ops qed_selftest_ops_pass = { .selftest_memory = &qed_selftest_memory, .selftest_interrupt = &qed_selftest_interrupt, @@ -1465,6 +1537,9 @@ const struct qed_common_ops qed_common_ops_pass = { .get_coalesce = &qed_get_coalesce, .set_coalesce = &qed_set_coalesce, .set_led = &qed_set_led, + .update_drv_state = &qed_update_drv_state, + .update_mac = &qed_update_mac, + .update_mtu = &qed_update_mtu, }; void qed_get_protocol_stats(struct qed_dev *cdev, diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index bdc9ba92f6d4..98dc913fd76d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "qed.h" #include "qed_dcbx.h" #include "qed_hsi.h" @@ -1068,6 +1069,8 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn, info->ovlan = (u16)(shmem_info.ovlan_stag & FUNC_MF_CFG_OV_STAG_MASK); + info->mtu = (u16)shmem_info.mtu_size; + DP_VERBOSE(p_hwfn, (QED_MSG_SP | NETIF_MSG_IFUP), "Read configuration from shmem: pause_on_host %02x protocol %02x BW [%02x - %02x] MAC %02x:%02x:%02x:%02x:%02x:%02x wwn port %llx node %llx ovlan %04x\n", info->pause_on_host, info->protocol, @@ -1223,6 +1226,166 @@ int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) return (cpu_mode & MCP_REG_CPU_MODE_SOFT_HALT) ? -EAGAIN : 0; } +int qed_mcp_ov_update_current_config(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_ov_client client) +{ + u32 resp = 0, param = 0; + u32 drv_mb_param; + int rc; + + switch (client) { + case QED_OV_CLIENT_DRV: + drv_mb_param = DRV_MB_PARAM_OV_CURR_CFG_OS; + break; + case QED_OV_CLIENT_USER: + drv_mb_param = DRV_MB_PARAM_OV_CURR_CFG_OTHER; + break; + case QED_OV_CLIENT_VENDOR_SPEC: + drv_mb_param = DRV_MB_PARAM_OV_CURR_CFG_VENDOR_SPEC; + break; + default: + DP_NOTICE(p_hwfn, "Invalid client type %d\n", client); + return -EINVAL; + } + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_CURR_CFG, + drv_mb_param, &resp, ¶m); + if (rc) + DP_ERR(p_hwfn, "MCP response failure, aborting\n"); + + return rc; +} + +int qed_mcp_ov_update_driver_state(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_ov_driver_state drv_state) +{ + u32 resp = 0, param = 0; + u32 drv_mb_param; + int rc; + + switch (drv_state) { + case QED_OV_DRIVER_STATE_NOT_LOADED: + drv_mb_param = DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_NOT_LOADED; + break; + case QED_OV_DRIVER_STATE_DISABLED: + drv_mb_param = DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_DISABLED; + break; + case QED_OV_DRIVER_STATE_ACTIVE: + drv_mb_param = DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_ACTIVE; + break; + default: + DP_NOTICE(p_hwfn, "Invalid driver state %d\n", drv_state); + return -EINVAL; + } + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE, + drv_mb_param, &resp, ¶m); + if (rc) + DP_ERR(p_hwfn, "Failed to send driver state\n"); + + return rc; +} + +int qed_mcp_ov_update_mtu(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u16 mtu) +{ + u32 resp = 0, param = 0; + u32 drv_mb_param; + int rc; + + drv_mb_param = (u32)mtu << DRV_MB_PARAM_OV_MTU_SIZE_SHIFT; + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_MTU, + drv_mb_param, &resp, ¶m); + if (rc) + DP_ERR(p_hwfn, "Failed to send mtu value, rc = %d\n", rc); + + return rc; +} + +int qed_mcp_ov_update_mac(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u8 *mac) +{ + struct qed_mcp_mb_params mb_params; + union drv_union_data union_data; + int rc; + + memset(&mb_params, 0, sizeof(mb_params)); + mb_params.cmd = DRV_MSG_CODE_SET_VMAC; + mb_params.param = DRV_MSG_CODE_VMAC_TYPE_MAC << + DRV_MSG_CODE_VMAC_TYPE_SHIFT; + mb_params.param |= MCP_PF_ID(p_hwfn); + ether_addr_copy(&union_data.raw_data[0], mac); + mb_params.p_data_src = &union_data; + rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); + if (rc) + DP_ERR(p_hwfn, "Failed to send mac address, rc = %d\n", rc); + + return rc; +} + +int qed_mcp_ov_update_wol(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, enum qed_ov_wol wol) +{ + u32 resp = 0, param = 0; + u32 drv_mb_param; + int rc; + + switch (wol) { + case QED_OV_WOL_DEFAULT: + drv_mb_param = DRV_MB_PARAM_WOL_DEFAULT; + break; + case QED_OV_WOL_DISABLED: + drv_mb_param = DRV_MB_PARAM_WOL_DISABLED; + break; + case QED_OV_WOL_ENABLED: + drv_mb_param = DRV_MB_PARAM_WOL_ENABLED; + break; + default: + DP_ERR(p_hwfn, "Invalid wol state %d\n", wol); + return -EINVAL; + } + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_WOL, + drv_mb_param, &resp, ¶m); + if (rc) + DP_ERR(p_hwfn, "Failed to send wol mode, rc = %d\n", rc); + + return rc; +} + +int qed_mcp_ov_update_eswitch(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_ov_eswitch eswitch) +{ + u32 resp = 0, param = 0; + u32 drv_mb_param; + int rc; + + switch (eswitch) { + case QED_OV_ESWITCH_NONE: + drv_mb_param = DRV_MB_PARAM_ESWITCH_MODE_NONE; + break; + case QED_OV_ESWITCH_VEB: + drv_mb_param = DRV_MB_PARAM_ESWITCH_MODE_VEB; + break; + case QED_OV_ESWITCH_VEPA: + drv_mb_param = DRV_MB_PARAM_ESWITCH_MODE_VEPA; + break; + default: + DP_ERR(p_hwfn, "Invalid eswitch mode %d\n", eswitch); + return -EINVAL; + } + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_ESWITCH_MODE, + drv_mb_param, &resp, ¶m); + if (rc) + DP_ERR(p_hwfn, "Failed to send eswitch mode, rc = %d\n", rc); + + return rc; +} + int qed_mcp_set_led(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, enum qed_led_mode mode) { diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index dff520ed069b..89507190628d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -92,6 +92,8 @@ struct qed_mcp_function_info { #define QED_MCP_VLAN_UNSET (0xffff) u16 ovlan; + + u16 mtu; }; struct qed_mcp_nvm_common { @@ -147,6 +149,30 @@ union qed_mcp_protocol_stats { struct qed_mcp_rdma_stats rdma_stats; }; +enum qed_ov_eswitch { + QED_OV_ESWITCH_NONE, + QED_OV_ESWITCH_VEB, + QED_OV_ESWITCH_VEPA +}; + +enum qed_ov_client { + QED_OV_CLIENT_DRV, + QED_OV_CLIENT_USER, + QED_OV_CLIENT_VENDOR_SPEC +}; + +enum qed_ov_driver_state { + QED_OV_DRIVER_STATE_NOT_LOADED, + QED_OV_DRIVER_STATE_DISABLED, + QED_OV_DRIVER_STATE_ACTIVE +}; + +enum qed_ov_wol { + QED_OV_WOL_DEFAULT, + QED_OV_WOL_DISABLED, + QED_OV_WOL_ENABLED +}; + /** * @brief - returns the link params of the hw function * @@ -277,6 +303,69 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_mcp_drv_version *p_ver); +/** + * @brief Notify MFW about the change in base device properties + * + * @param p_hwfn + * @param p_ptt + * @param client - qed client type + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_ov_update_current_config(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_ov_client client); + +/** + * @brief Notify MFW about the driver state + * + * @param p_hwfn + * @param p_ptt + * @param drv_state - Driver state + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_ov_update_driver_state(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_ov_driver_state drv_state); + +/** + * @brief Send MTU size to MFW + * + * @param p_hwfn + * @param p_ptt + * @param mtu - MTU size + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_ov_update_mtu(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u16 mtu); + +/** + * @brief Send MAC address to MFW + * + * @param p_hwfn + * @param p_ptt + * @param mac - MAC address + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_ov_update_mac(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u8 *mac); + +/** + * @brief Send WOL mode to MFW + * + * @param p_hwfn + * @param p_ptt + * @param wol - WOL mode + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_ov_update_wol(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_ov_wol wol); + /** * @brief Set LED status * @@ -546,4 +635,17 @@ int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn, int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 mask_parities); +/** + * @brief Send eswitch mode to MFW + * + * @param p_hwfn + * @param p_ptt + * @param eswitch - eswitch mode + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_ov_update_eswitch(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_ov_eswitch eswitch); + #endif diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 0100f5c0a4ec..775fdaafd24d 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -739,6 +739,8 @@ int qede_change_mtu(struct net_device *ndev, int new_mtu) qede_update_mtu(edev, &args); + edev->ops->common->update_mtu(edev->cdev, args.mtu); + return 0; } diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 8488ad36a2b8..df0bd0ce2b18 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -2396,6 +2396,8 @@ static void qede_init_ndev(struct qede_dev *edev) /* Set network device HW mac */ ether_addr_copy(edev->ndev->dev_addr, edev->dev_info.common.hw_mac); + + ndev->mtu = edev->dev_info.common.mtu; } /* This function converts from 32b param to two params of level and module @@ -3751,6 +3753,8 @@ static int qede_open(struct net_device *ndev) udp_tunnel_get_rx_info(ndev); + edev->ops->common->update_drv_state(edev->cdev, true); + return 0; } @@ -3760,6 +3764,8 @@ static int qede_close(struct net_device *ndev) qede_unload(edev, QEDE_UNLOAD_NORMAL); + edev->ops->common->update_drv_state(edev->cdev, false); + return 0; } @@ -3820,6 +3826,8 @@ static int qede_set_mac_addr(struct net_device *ndev, void *p) if (rc) return rc; + edev->ops->common->update_mac(edev->cdev, addr->sa_data); + /* Add MAC filter according to the new unicast HW MAC address */ ether_addr_copy(edev->primary_mac, ndev->dev_addr); return qede_set_ucast_rx_mac(edev, QED_FILTER_XCAST_TYPE_ADD, diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 8978a60371f4..5c909cd02764 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -267,6 +267,7 @@ struct qed_dev_info { u8 mf_mode; bool tx_switching; bool rdma_supported; + u16 mtu; }; enum qed_sb_type { @@ -554,6 +555,33 @@ struct qed_common_ops { */ int (*set_led)(struct qed_dev *cdev, enum qed_led_mode mode); + +/** + * @brief update_drv_state - API to inform the change in the driver state. + * + * @param cdev + * @param active + * + */ + int (*update_drv_state)(struct qed_dev *cdev, bool active); + +/** + * @brief update_mac - API to inform the change in the mac address + * + * @param cdev + * @param mac + * + */ + int (*update_mac)(struct qed_dev *cdev, u8 *mac); + +/** + * @brief update_mtu - API to inform the change in the mtu + * + * @param cdev + * @param mtu + * + */ + int (*update_mtu)(struct qed_dev *cdev, u16 mtu); }; #define MASK_FIELD(_name, _value) \ -- cgit v1.2.3 From 7a4b21b7d1f0644456501e33d3917c9aaee76a75 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Mon, 31 Oct 2016 07:14:22 +0200 Subject: qed: Add nvram selftest Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_hsi.h | 4 + drivers/net/ethernet/qlogic/qed/qed_main.c | 1 + drivers/net/ethernet/qlogic/qed/qed_mcp.c | 94 ++++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_mcp.h | 41 ++++++++++ drivers/net/ethernet/qlogic/qed/qed_selftest.c | 101 ++++++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_selftest.h | 10 +++ drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 7 ++ include/linux/qed/qed_if.h | 9 +++ 8 files changed, 267 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 36de87a1befa..f7dfa2ec2d19 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -8666,6 +8666,8 @@ struct public_drv_mb { #define DRV_MB_PARAM_BIST_REGISTER_TEST 1 #define DRV_MB_PARAM_BIST_CLOCK_TEST 2 +#define DRV_MB_PARAM_BIST_NVM_TEST_NUM_IMAGES 3 +#define DRV_MB_PARAM_BIST_NVM_TEST_IMAGE_BY_INDEX 4 #define DRV_MB_PARAM_BIST_RC_UNKNOWN 0 #define DRV_MB_PARAM_BIST_RC_PASSED 1 @@ -8674,6 +8676,8 @@ struct public_drv_mb { #define DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT 0 #define DRV_MB_PARAM_BIST_TEST_INDEX_MASK 0x000000FF +#define DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_SHIFT 8 +#define DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_MASK 0x0000FF00 u32 fw_mb_header; #define FW_MSG_CODE_MASK 0xffff0000 diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index d9fa52a22667..31f8e420c830 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1508,6 +1508,7 @@ static struct qed_selftest_ops qed_selftest_ops_pass = { .selftest_interrupt = &qed_selftest_interrupt, .selftest_register = &qed_selftest_register, .selftest_clock = &qed_selftest_clock, + .selftest_nvram = &qed_selftest_nvram, }; const struct qed_common_ops qed_common_ops_pass = { diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 98dc913fd76d..8be61570ce6b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1434,6 +1434,52 @@ int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn, return rc; } +int qed_mcp_nvm_read(struct qed_dev *cdev, u32 addr, u8 *p_buf, u32 len) +{ + u32 bytes_left = len, offset = 0, bytes_to_copy, read_len = 0; + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + u32 resp = 0, resp_param = 0; + struct qed_ptt *p_ptt; + int rc = 0; + + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) + return -EBUSY; + + while (bytes_left > 0) { + bytes_to_copy = min_t(u32, bytes_left, MCP_DRV_NVM_BUF_LEN); + + rc = qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt, + DRV_MSG_CODE_NVM_READ_NVRAM, + addr + offset + + (bytes_to_copy << + DRV_MB_PARAM_NVM_LEN_SHIFT), + &resp, &resp_param, + &read_len, + (u32 *)(p_buf + offset)); + + if (rc || (resp != FW_MSG_CODE_NVM_OK)) { + DP_NOTICE(cdev, "MCP command rc = %d\n", rc); + break; + } + + /* This can be a lengthy process, and it's possible scheduler + * isn't preemptable. Sleep a bit to prevent CPU hogging. + */ + if (bytes_left % 0x1000 < + (bytes_left - read_len) % 0x1000) + usleep_range(1000, 2000); + + offset += read_len; + bytes_left -= read_len; + } + + cdev->mcp_nvm_resp = resp; + qed_ptt_release(p_hwfn, p_ptt); + + return rc; +} + int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 drv_mb_param = 0, rsp, param; @@ -1475,3 +1521,51 @@ int qed_mcp_bist_clock_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) return rc; } + +int qed_mcp_bist_nvm_test_get_num_images(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *num_images) +{ + u32 drv_mb_param = 0, rsp; + int rc = 0; + + drv_mb_param = (DRV_MB_PARAM_BIST_NVM_TEST_NUM_IMAGES << + DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT); + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_BIST_TEST, + drv_mb_param, &rsp, num_images); + if (rc) + return rc; + + if (((rsp & FW_MSG_CODE_MASK) != FW_MSG_CODE_OK)) + rc = -EINVAL; + + return rc; +} + +int qed_mcp_bist_nvm_test_get_image_att(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct bist_nvm_image_att *p_image_att, + u32 image_index) +{ + u32 buf_size = 0, param, resp = 0, resp_param = 0; + int rc; + + param = DRV_MB_PARAM_BIST_NVM_TEST_IMAGE_BY_INDEX << + DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT; + param |= image_index << DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_SHIFT; + + rc = qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt, + DRV_MSG_CODE_BIST_TEST, param, + &resp, &resp_param, + &buf_size, + (u32 *)p_image_att); + if (rc) + return rc; + + if (((resp & FW_MSG_CODE_MASK) != FW_MSG_CODE_OK) || + (p_image_att->return_code != 1)) + rc = -EINVAL; + + return rc; +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 89507190628d..be8152d49de2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -379,6 +379,18 @@ int qed_mcp_set_led(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, enum qed_led_mode mode); +/** + * @brief Read from nvm + * + * @param cdev + * @param addr - nvm offset + * @param p_buf - nvm read buffer + * @param len - buffer len + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_nvm_read(struct qed_dev *cdev, u32 addr, u8 *p_buf, u32 len); + /** * @brief Bist register test * @@ -401,6 +413,35 @@ int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn, int qed_mcp_bist_clock_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); +/** + * @brief Bist nvm test - get number of images + * + * @param p_hwfn - hw function + * @param p_ptt - PTT required for register access + * @param num_images - number of images if operation was + * successful. 0 if not. + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_bist_nvm_test_get_num_images(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *num_images); + +/** + * @brief Bist nvm test - get image attributes by index + * + * @param p_hwfn - hw function + * @param p_ptt - PTT required for register access + * @param p_image_att - Attributes of image + * @param image_index - Index of image to get information for + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_bist_nvm_test_get_image_att(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct bist_nvm_image_att *p_image_att, + u32 image_index); + /* Using hwfn number (and not pf_num) is required since in CMT mode, * same pf_num may be used by two different hwfn * TODO - this shouldn't really be in .h file, but until all fields diff --git a/drivers/net/ethernet/qlogic/qed/qed_selftest.c b/drivers/net/ethernet/qlogic/qed/qed_selftest.c index 9b7678f26909..48bfaecaf6dc 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_selftest.c +++ b/drivers/net/ethernet/qlogic/qed/qed_selftest.c @@ -1,3 +1,4 @@ +#include #include "qed.h" #include "qed_dev_api.h" #include "qed_mcp.h" @@ -75,3 +76,103 @@ int qed_selftest_clock(struct qed_dev *cdev) return rc; } + +int qed_selftest_nvram(struct qed_dev *cdev) +{ + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn); + u32 num_images, i, j, nvm_crc, calc_crc; + struct bist_nvm_image_att image_att; + u8 *buf = NULL; + __be32 val; + int rc; + + if (!p_ptt) { + DP_ERR(p_hwfn, "failed to acquire ptt\n"); + return -EBUSY; + } + + /* Acquire from MFW the amount of available images */ + rc = qed_mcp_bist_nvm_test_get_num_images(p_hwfn, p_ptt, &num_images); + if (rc || !num_images) { + DP_ERR(p_hwfn, "Failed getting number of images\n"); + return -EINVAL; + } + + /* Iterate over images and validate CRC */ + for (i = 0; i < num_images; i++) { + /* This mailbox returns information about the image required for + * reading it. + */ + rc = qed_mcp_bist_nvm_test_get_image_att(p_hwfn, p_ptt, + &image_att, i); + if (rc) { + DP_ERR(p_hwfn, + "Failed getting image index %d attributes\n", + i); + goto err0; + } + + /* After MFW crash dump is collected - the image's CRC stops + * being valid. + */ + if (image_att.image_type == NVM_TYPE_MDUMP) + continue; + + DP_VERBOSE(p_hwfn, QED_MSG_SP, "image index %d, size %x\n", + i, image_att.len); + + /* Allocate a buffer for holding the nvram image */ + buf = kzalloc(image_att.len, GFP_KERNEL); + if (!buf) { + rc = -ENOMEM; + goto err0; + } + + /* Read image into buffer */ + rc = qed_mcp_nvm_read(p_hwfn->cdev, image_att.nvm_start_addr, + buf, image_att.len); + if (rc) { + DP_ERR(p_hwfn, + "Failed reading image index %d from nvm.\n", i); + goto err1; + } + + /* Convert the buffer into big-endian format (excluding the + * closing 4 bytes of CRC). + */ + for (j = 0; j < image_att.len - 4; j += 4) { + val = cpu_to_be32(*(u32 *)&buf[j]); + *(u32 *)&buf[j] = (__force u32)val; + } + + /* Calc CRC for the "actual" image buffer, i.e. not including + * the last 4 CRC bytes. + */ + nvm_crc = *(u32 *)(buf + image_att.len - 4); + calc_crc = crc32(0xffffffff, buf, image_att.len - 4); + calc_crc = (__force u32)~cpu_to_be32(calc_crc); + DP_VERBOSE(p_hwfn, QED_MSG_SP, + "nvm crc 0x%x, calc_crc 0x%x\n", nvm_crc, calc_crc); + + if (calc_crc != nvm_crc) { + rc = -EINVAL; + goto err1; + } + + /* Done with this image; Free to prevent double release + * on subsequent failure. + */ + kfree(buf); + buf = NULL; + } + + qed_ptt_release(p_hwfn, p_ptt); + return 0; + +err1: + kfree(buf); +err0: + qed_ptt_release(p_hwfn, p_ptt); + return rc; +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_selftest.h b/drivers/net/ethernet/qlogic/qed/qed_selftest.h index 50eb0b49950f..739ddb730967 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_selftest.h +++ b/drivers/net/ethernet/qlogic/qed/qed_selftest.h @@ -37,4 +37,14 @@ int qed_selftest_register(struct qed_dev *cdev); * @return int */ int qed_selftest_clock(struct qed_dev *cdev); + +/** + * @brief qed_selftest_nvram - Perform nvram test + * + * @param cdev + * + * @return int + */ +int qed_selftest_nvram(struct qed_dev *cdev); + #endif diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 775fdaafd24d..a8094088b9ac 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -157,6 +157,7 @@ enum qede_ethtool_tests { QEDE_ETHTOOL_MEMORY_TEST, QEDE_ETHTOOL_REGISTER_TEST, QEDE_ETHTOOL_CLOCK_TEST, + QEDE_ETHTOOL_NVRAM_TEST, QEDE_ETHTOOL_TEST_MAX }; @@ -166,6 +167,7 @@ static const char qede_tests_str_arr[QEDE_ETHTOOL_TEST_MAX][ETH_GSTRING_LEN] = { "Memory (online)\t\t", "Register (online)\t", "Clock (online)\t\t", + "Nvram (online)\t\t", }; static void qede_get_strings_stats(struct qede_dev *edev, u8 *buf) @@ -1392,6 +1394,11 @@ static void qede_self_test(struct net_device *dev, buf[QEDE_ETHTOOL_CLOCK_TEST] = 1; etest->flags |= ETH_TEST_FL_FAILED; } + + if (edev->ops->common->selftest->selftest_nvram(edev->cdev)) { + buf[QEDE_ETHTOOL_NVRAM_TEST] = 1; + etest->flags |= ETH_TEST_FL_FAILED; + } } static int qede_set_tunable(struct net_device *dev, diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 5c909cd02764..ffc2d2f5e88f 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -402,6 +402,15 @@ struct qed_selftest_ops { * @return 0 on success, error otherwise. */ int (*selftest_clock)(struct qed_dev *cdev); + +/** + * @brief selftest_nvram - Perform nvram test + * + * @param cdev + * + * @return 0 on success, error otherwise. + */ + int (*selftest_nvram) (struct qed_dev *cdev); }; struct qed_common_ops { -- cgit v1.2.3 From 14d39648cbfc6289e3f873d30f282b9517ebe860 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Mon, 31 Oct 2016 07:14:23 +0200 Subject: qed*: Add support for WoL Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed.h | 11 ++++- drivers/net/ethernet/qlogic/qed/qed_dev.c | 19 ++++++++- drivers/net/ethernet/qlogic/qed/qed_hsi.h | 4 ++ drivers/net/ethernet/qlogic/qed/qed_main.c | 29 +++++++++++++ drivers/net/ethernet/qlogic/qed/qed_mcp.c | 56 ++++++++++++++++++++++++- drivers/net/ethernet/qlogic/qede/qede.h | 2 + drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 41 ++++++++++++++++++ drivers/net/ethernet/qlogic/qede/qede_main.c | 9 ++++ include/linux/qed/qed_if.h | 10 +++++ 9 files changed, 176 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index f20243c1fb0b..8828ffac4b23 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -195,6 +195,11 @@ enum qed_dev_cap { QED_DEV_CAP_ROCE, }; +enum qed_wol_support { + QED_WOL_SUPPORT_NONE, + QED_WOL_SUPPORT_PME, +}; + struct qed_hw_info { /* PCI personality */ enum qed_pci_personality personality; @@ -227,6 +232,8 @@ struct qed_hw_info { u32 hw_mode; unsigned long device_capabilities; u16 mtu; + + enum qed_wol_support b_wol_support; }; struct qed_hw_cid_data { @@ -539,7 +546,9 @@ struct qed_dev { u8 mcp_rev; u8 boot_mode; - u8 wol; + /* WoL related configurations */ + u8 wol_config; + u8 wol_mac[ETH_ALEN]; u32 int_mode; enum qed_coalescing_mode int_coalescing_mode; diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 33fd69e24bae..127ed5f27d8d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -1364,8 +1364,24 @@ int qed_hw_reset(struct qed_dev *cdev) { int rc = 0; u32 unload_resp, unload_param; + u32 wol_param; int i; + switch (cdev->wol_config) { + case QED_OV_WOL_DISABLED: + wol_param = DRV_MB_PARAM_UNLOAD_WOL_DISABLED; + break; + case QED_OV_WOL_ENABLED: + wol_param = DRV_MB_PARAM_UNLOAD_WOL_ENABLED; + break; + default: + DP_NOTICE(cdev, + "Unknown WoL configuration %02x\n", cdev->wol_config); + /* Fallthrough */ + case QED_OV_WOL_DEFAULT: + wol_param = DRV_MB_PARAM_UNLOAD_WOL_MCP; + } + for_each_hwfn(cdev, i) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; @@ -1394,8 +1410,7 @@ int qed_hw_reset(struct qed_dev *cdev) /* Send unload command to MCP */ rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt, - DRV_MSG_CODE_UNLOAD_REQ, - DRV_MB_PARAM_UNLOAD_WOL_MCP, + DRV_MSG_CODE_UNLOAD_REQ, wol_param, &unload_resp, &unload_param); if (rc) { DP_NOTICE(p_hwfn, "qed_hw_reset: UNLOAD_REQ failed\n"); diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index f7dfa2ec2d19..fdb7a099955b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -8601,6 +8601,7 @@ struct public_drv_mb { #define DRV_MSG_CODE_BIST_TEST 0x001e0000 #define DRV_MSG_CODE_SET_LED_MODE 0x00200000 +#define DRV_MSG_CODE_OS_WOL 0x002e0000 #define DRV_MSG_SEQ_NUMBER_MASK 0x0000ffff @@ -8697,6 +8698,9 @@ struct public_drv_mb { #define FW_MSG_CODE_NVM_OK 0x00010000 #define FW_MSG_CODE_OK 0x00160000 +#define FW_MSG_CODE_OS_WOL_SUPPORTED 0x00800000 +#define FW_MSG_CODE_OS_WOL_NOT_SUPPORTED 0x00810000 + #define FW_MSG_SEQ_NUMBER_MASK 0x0000ffff u32 fw_mb_param; diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 31f8e420c830..b71d73a41b10 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -221,6 +221,10 @@ int qed_fill_dev_info(struct qed_dev *cdev, dev_info->fw_eng = FW_ENGINEERING_VERSION; dev_info->mf_mode = cdev->mf_mode; dev_info->tx_switching = true; + + if (QED_LEADING_HWFN(cdev)->hw_info.b_wol_support == + QED_WOL_SUPPORT_PME) + dev_info->wol_support = true; } else { qed_vf_get_fw_version(&cdev->hwfns[0], &dev_info->fw_major, &dev_info->fw_minor, &dev_info->fw_rev, @@ -1433,6 +1437,30 @@ static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode) return status; } +static int qed_update_wol(struct qed_dev *cdev, bool enabled) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *ptt; + int rc = 0; + + if (IS_VF(cdev)) + return 0; + + ptt = qed_ptt_acquire(hwfn); + if (!ptt) + return -EAGAIN; + + rc = qed_mcp_ov_update_wol(hwfn, ptt, enabled ? QED_OV_WOL_ENABLED + : QED_OV_WOL_DISABLED); + if (rc) + goto out; + rc = qed_mcp_ov_update_current_config(hwfn, ptt, QED_OV_CLIENT_DRV); + +out: + qed_ptt_release(hwfn, ptt); + return rc; +} + static int qed_update_drv_state(struct qed_dev *cdev, bool active) { struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); @@ -1541,6 +1569,7 @@ const struct qed_common_ops qed_common_ops_pass = { .update_drv_state = &qed_update_drv_state, .update_mac = &qed_update_mac, .update_mtu = &qed_update_mtu, + .update_wol = &qed_update_wol, }; void qed_get_protocol_stats(struct qed_dev *cdev, diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 8be61570ce6b..768b35b1dca0 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -330,6 +330,7 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, struct qed_mcp_mb_params *p_mb_params) { u32 union_data_addr; + int rc; /* MCP not initialized */ @@ -375,11 +376,32 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn, u32 *o_mcp_param) { struct qed_mcp_mb_params mb_params; + union drv_union_data data_src; int rc; memset(&mb_params, 0, sizeof(mb_params)); + memset(&data_src, 0, sizeof(data_src)); mb_params.cmd = cmd; mb_params.param = param; + + /* In case of UNLOAD_DONE, set the primary MAC */ + if ((cmd == DRV_MSG_CODE_UNLOAD_DONE) && + (p_hwfn->cdev->wol_config == QED_OV_WOL_ENABLED)) { + u8 *p_mac = p_hwfn->cdev->wol_mac; + + data_src.wol_mac.mac_upper = p_mac[0] << 8 | p_mac[1]; + data_src.wol_mac.mac_lower = p_mac[2] << 24 | p_mac[3] << 16 | + p_mac[4] << 8 | p_mac[5]; + + DP_VERBOSE(p_hwfn, + (QED_MSG_SP | NETIF_MSG_IFDOWN), + "Setting WoL MAC: %pM --> [%08x,%08x]\n", + p_mac, data_src.wol_mac.mac_upper, + data_src.wol_mac.mac_lower); + + mb_params.p_data_src = &data_src; + } + rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); if (rc) return rc; @@ -1058,6 +1080,9 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn, info->mac[3] = (u8)(shmem_info.mac_lower >> 16); info->mac[4] = (u8)(shmem_info.mac_lower >> 8); info->mac[5] = (u8)(shmem_info.mac_lower); + + /* Store primary MAC for later possible WoL */ + memcpy(&p_hwfn->cdev->wol_mac, info->mac, ETH_ALEN); } else { DP_NOTICE(p_hwfn, "MAC is 0 in shmem\n"); } @@ -1071,13 +1096,28 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn, info->mtu = (u16)shmem_info.mtu_size; + p_hwfn->hw_info.b_wol_support = QED_WOL_SUPPORT_NONE; + p_hwfn->cdev->wol_config = (u8)QED_OV_WOL_DEFAULT; + if (qed_mcp_is_init(p_hwfn)) { + u32 resp = 0, param = 0; + int rc; + + rc = qed_mcp_cmd(p_hwfn, p_ptt, + DRV_MSG_CODE_OS_WOL, 0, &resp, ¶m); + if (rc) + return rc; + if (resp == FW_MSG_CODE_OS_WOL_SUPPORTED) + p_hwfn->hw_info.b_wol_support = QED_WOL_SUPPORT_PME; + } + DP_VERBOSE(p_hwfn, (QED_MSG_SP | NETIF_MSG_IFUP), - "Read configuration from shmem: pause_on_host %02x protocol %02x BW [%02x - %02x] MAC %02x:%02x:%02x:%02x:%02x:%02x wwn port %llx node %llx ovlan %04x\n", + "Read configuration from shmem: pause_on_host %02x protocol %02x BW [%02x - %02x] MAC %02x:%02x:%02x:%02x:%02x:%02x wwn port %llx node %llx ovlan %04x wol %02x\n", info->pause_on_host, info->protocol, info->bandwidth_min, info->bandwidth_max, info->mac[0], info->mac[1], info->mac[2], info->mac[3], info->mac[4], info->mac[5], - info->wwn_port, info->wwn_node, info->ovlan); + info->wwn_port, info->wwn_node, + info->ovlan, (u8)p_hwfn->hw_info.b_wol_support); return 0; } @@ -1322,6 +1362,9 @@ int qed_mcp_ov_update_mac(struct qed_hwfn *p_hwfn, if (rc) DP_ERR(p_hwfn, "Failed to send mac address, rc = %d\n", rc); + /* Store primary MAC for later possible WoL */ + memcpy(p_hwfn->cdev->wol_mac, mac, ETH_ALEN); + return rc; } @@ -1332,6 +1375,12 @@ int qed_mcp_ov_update_wol(struct qed_hwfn *p_hwfn, u32 drv_mb_param; int rc; + if (p_hwfn->hw_info.b_wol_support == QED_WOL_SUPPORT_NONE) { + DP_VERBOSE(p_hwfn, QED_MSG_SP, + "Can't change WoL configuration when WoL isn't supported\n"); + return -EINVAL; + } + switch (wol) { case QED_OV_WOL_DEFAULT: drv_mb_param = DRV_MB_PARAM_WOL_DEFAULT; @@ -1352,6 +1401,9 @@ int qed_mcp_ov_update_wol(struct qed_hwfn *p_hwfn, if (rc) DP_ERR(p_hwfn, "Failed to send wol mode, rc = %d\n", rc); + /* Store the WoL update for a future unload */ + p_hwfn->cdev->wol_config = (u8)wol; + return rc; } diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index cf8d3547aecf..0cba21bf9d5f 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -193,6 +193,8 @@ struct qede_dev { u16 vxlan_dst_port; u16 geneve_dst_port; + bool wol_enabled; + struct qede_rdma_dev rdma_info; }; diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index a8094088b9ac..327c614e76aa 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -483,6 +483,45 @@ static void qede_get_drvinfo(struct net_device *ndev, strlcpy(info->bus_info, pci_name(edev->pdev), sizeof(info->bus_info)); } +static void qede_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) +{ + struct qede_dev *edev = netdev_priv(ndev); + + if (edev->dev_info.common.wol_support) { + wol->supported = WAKE_MAGIC; + wol->wolopts = edev->wol_enabled ? WAKE_MAGIC : 0; + } +} + +static int qede_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) +{ + struct qede_dev *edev = netdev_priv(ndev); + bool wol_requested; + int rc; + + if (wol->wolopts & ~WAKE_MAGIC) { + DP_INFO(edev, + "Can't support WoL options other than magic-packet\n"); + return -EINVAL; + } + + wol_requested = !!(wol->wolopts & WAKE_MAGIC); + if (wol_requested == edev->wol_enabled) + return 0; + + /* Need to actually change configuration */ + if (!edev->dev_info.common.wol_support) { + DP_INFO(edev, "Device doesn't support WoL\n"); + return -EINVAL; + } + + rc = edev->ops->common->update_wol(edev->cdev, wol_requested); + if (!rc) + edev->wol_enabled = wol_requested; + + return rc; +} + static u32 qede_get_msglevel(struct net_device *ndev) { struct qede_dev *edev = netdev_priv(ndev); @@ -1449,6 +1488,8 @@ static const struct ethtool_ops qede_ethtool_ops = { .get_drvinfo = qede_get_drvinfo, .get_regs_len = qede_get_regs_len, .get_regs = qede_get_regs, + .get_wol = qede_get_wol, + .set_wol = qede_set_wol, .get_msglevel = qede_get_msglevel, .set_msglevel = qede_set_msglevel, .nway_reset = qede_nway_reset, diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index df0bd0ce2b18..873f2ebe249e 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -95,6 +95,7 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id); #define TX_TIMEOUT (5 * HZ) static void qede_remove(struct pci_dev *pdev); +static void qede_shutdown(struct pci_dev *pdev); static int qede_alloc_rx_buffer(struct qede_dev *edev, struct qede_rx_queue *rxq); static void qede_link_update(void *dev, struct qed_link_output *link); @@ -166,6 +167,7 @@ static struct pci_driver qede_pci_driver = { .id_table = qede_pci_tbl, .probe = qede_probe, .remove = qede_remove, + .shutdown = qede_shutdown, #ifdef CONFIG_QED_SRIOV .sriov_configure = qede_sriov_configure, #endif @@ -2705,6 +2707,8 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) /* Use global ops since we've freed edev */ qed_ops->common->slowpath_stop(cdev); + if (system_state == SYSTEM_POWER_OFF) + return; qed_ops->common->remove(cdev); dev_info(&pdev->dev, "Ending qede_remove successfully\n"); @@ -2715,6 +2719,11 @@ static void qede_remove(struct pci_dev *pdev) __qede_remove(pdev, QEDE_REMOVE_NORMAL); } +static void qede_shutdown(struct pci_dev *pdev) +{ + __qede_remove(pdev, QEDE_REMOVE_NORMAL); +} + /* ------------------------------------------------------------------------- * START OF LOAD / UNLOAD * ------------------------------------------------------------------------- diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index ffc2d2f5e88f..ea095b4893aa 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -268,6 +268,8 @@ struct qed_dev_info { bool tx_switching; bool rdma_supported; u16 mtu; + + bool wol_support; }; enum qed_sb_type { @@ -591,6 +593,14 @@ struct qed_common_ops { * */ int (*update_mtu)(struct qed_dev *cdev, u16 mtu); + +/** + * @brief update_wol - update of changes in the WoL configuration + * + * @param cdev + * @param enabled - true iff WoL should be enabled. + */ + int (*update_wol) (struct qed_dev *cdev, bool enabled); }; #define MASK_FIELD(_name, _value) \ -- cgit v1.2.3 From 2edbff8dcb5da324fd4c4fe953629e4f6ca73c99 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 31 Oct 2016 07:14:27 +0200 Subject: qed: Learn resources from management firmware Currently, each interfaces assumes it receives an equal portion of HW/FW resources, but this is wasteful - different partitions [and specifically, parititions exposing different protocol support] might require different resources. Implement a new resource learning scheme where the information is received directly from the management firmware [which has knowledge of all of the functions and can serve as arbiter]. Signed-off-by: Tomer Tayar Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed.h | 6 +- drivers/net/ethernet/qlogic/qed/qed_dev.c | 291 ++++++++++++++++++++++++------ drivers/net/ethernet/qlogic/qed/qed_hsi.h | 46 +++++ drivers/net/ethernet/qlogic/qed/qed_l2.c | 2 +- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 42 +++++ drivers/net/ethernet/qlogic/qed/qed_mcp.h | 15 ++ include/linux/qed/qed_eth_if.h | 2 +- 7 files changed, 341 insertions(+), 63 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 6d3013fe6987..50b8a01ff512 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -154,7 +154,10 @@ struct qed_qm_iids { u32 tids; }; -enum QED_RESOURCES { +/* HW / FW resources, output of features supported below, most information + * is received from MFW. + */ +enum qed_resources { QED_SB, QED_L2_QUEUE, QED_VPORT, @@ -166,6 +169,7 @@ enum QED_RESOURCES { QED_RDMA_CNQ_RAM, QED_ILT, QED_LL2_QUEUE, + QED_CMDQS_CQS, QED_RDMA_STATS_QUEUE, QED_MAX_RESC, }; diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index d996afe833ee..5be7b8a25425 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -1512,47 +1512,240 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn) RESC_NUM(p_hwfn, QED_SB), num_features); } -static int qed_hw_get_resc(struct qed_hwfn *p_hwfn) +static enum resource_id_enum qed_hw_get_mfw_res_id(enum qed_resources res_id) +{ + enum resource_id_enum mfw_res_id = RESOURCE_NUM_INVALID; + + switch (res_id) { + case QED_SB: + mfw_res_id = RESOURCE_NUM_SB_E; + break; + case QED_L2_QUEUE: + mfw_res_id = RESOURCE_NUM_L2_QUEUE_E; + break; + case QED_VPORT: + mfw_res_id = RESOURCE_NUM_VPORT_E; + break; + case QED_RSS_ENG: + mfw_res_id = RESOURCE_NUM_RSS_ENGINES_E; + break; + case QED_PQ: + mfw_res_id = RESOURCE_NUM_PQ_E; + break; + case QED_RL: + mfw_res_id = RESOURCE_NUM_RL_E; + break; + case QED_MAC: + case QED_VLAN: + /* Each VFC resource can accommodate both a MAC and a VLAN */ + mfw_res_id = RESOURCE_VFC_FILTER_E; + break; + case QED_ILT: + mfw_res_id = RESOURCE_ILT_E; + break; + case QED_LL2_QUEUE: + mfw_res_id = RESOURCE_LL2_QUEUE_E; + break; + case QED_RDMA_CNQ_RAM: + case QED_CMDQS_CQS: + /* CNQ/CMDQS are the same resource */ + mfw_res_id = RESOURCE_CQS_E; + break; + case QED_RDMA_STATS_QUEUE: + mfw_res_id = RESOURCE_RDMA_STATS_QUEUE_E; + break; + default: + break; + } + + return mfw_res_id; +} + +static u32 qed_hw_get_dflt_resc_num(struct qed_hwfn *p_hwfn, + enum qed_resources res_id) { - u8 enabled_func_idx = p_hwfn->enabled_func_idx; - u32 *resc_start = p_hwfn->hw_info.resc_start; u8 num_funcs = p_hwfn->num_funcs_on_engine; - u32 *resc_num = p_hwfn->hw_info.resc_num; struct qed_sb_cnt_info sb_cnt_info; - int i, max_vf_vlan_filters; + u32 dflt_resc_num = 0; - memset(&sb_cnt_info, 0, sizeof(sb_cnt_info)); + switch (res_id) { + case QED_SB: + memset(&sb_cnt_info, 0, sizeof(sb_cnt_info)); + qed_int_get_num_sbs(p_hwfn, &sb_cnt_info); + dflt_resc_num = sb_cnt_info.sb_cnt; + break; + case QED_L2_QUEUE: + dflt_resc_num = MAX_NUM_L2_QUEUES_BB / num_funcs; + break; + case QED_VPORT: + dflt_resc_num = MAX_NUM_VPORTS_BB / num_funcs; + break; + case QED_RSS_ENG: + dflt_resc_num = ETH_RSS_ENGINE_NUM_BB / num_funcs; + break; + case QED_PQ: + /* The granularity of the PQs is 8 */ + dflt_resc_num = MAX_QM_TX_QUEUES_BB / num_funcs; + dflt_resc_num &= ~0x7; + break; + case QED_RL: + dflt_resc_num = MAX_QM_GLOBAL_RLS / num_funcs; + break; + case QED_MAC: + case QED_VLAN: + /* Each VFC resource can accommodate both a MAC and a VLAN */ + dflt_resc_num = ETH_NUM_MAC_FILTERS / num_funcs; + break; + case QED_ILT: + dflt_resc_num = PXP_NUM_ILT_RECORDS_BB / num_funcs; + break; + case QED_LL2_QUEUE: + dflt_resc_num = MAX_NUM_LL2_RX_QUEUES / num_funcs; + break; + case QED_RDMA_CNQ_RAM: + case QED_CMDQS_CQS: + /* CNQ/CMDQS are the same resource */ + dflt_resc_num = NUM_OF_CMDQS_CQS / num_funcs; + break; + case QED_RDMA_STATS_QUEUE: + dflt_resc_num = RDMA_NUM_STATISTIC_COUNTERS_BB / num_funcs; + break; + default: + break; + } -#ifdef CONFIG_QED_SRIOV - max_vf_vlan_filters = QED_ETH_MAX_VF_NUM_VLAN_FILTERS; -#else - max_vf_vlan_filters = 0; -#endif + return dflt_resc_num; +} + +static const char *qed_hw_get_resc_name(enum qed_resources res_id) +{ + switch (res_id) { + case QED_SB: + return "SB"; + case QED_L2_QUEUE: + return "L2_QUEUE"; + case QED_VPORT: + return "VPORT"; + case QED_RSS_ENG: + return "RSS_ENG"; + case QED_PQ: + return "PQ"; + case QED_RL: + return "RL"; + case QED_MAC: + return "MAC"; + case QED_VLAN: + return "VLAN"; + case QED_RDMA_CNQ_RAM: + return "RDMA_CNQ_RAM"; + case QED_ILT: + return "ILT"; + case QED_LL2_QUEUE: + return "LL2_QUEUE"; + case QED_CMDQS_CQS: + return "CMDQS_CQS"; + case QED_RDMA_STATS_QUEUE: + return "RDMA_STATS_QUEUE"; + default: + return "UNKNOWN_RESOURCE"; + } +} - qed_int_get_num_sbs(p_hwfn, &sb_cnt_info); +static int qed_hw_set_resc_info(struct qed_hwfn *p_hwfn, + enum qed_resources res_id) +{ + u32 dflt_resc_num = 0, dflt_resc_start = 0, mcp_resp, mcp_param; + u32 *p_resc_num, *p_resc_start; + struct resource_info resc_info; + int rc; + + p_resc_num = &RESC_NUM(p_hwfn, res_id); + p_resc_start = &RESC_START(p_hwfn, res_id); + + /* Default values assumes that each function received equal share */ + dflt_resc_num = qed_hw_get_dflt_resc_num(p_hwfn, res_id); + if (!dflt_resc_num) { + DP_ERR(p_hwfn, + "Failed to get default amount for resource %d [%s]\n", + res_id, qed_hw_get_resc_name(res_id)); + return -EINVAL; + } + dflt_resc_start = dflt_resc_num * p_hwfn->enabled_func_idx; + + memset(&resc_info, 0, sizeof(resc_info)); + resc_info.res_id = qed_hw_get_mfw_res_id(res_id); + if (resc_info.res_id == RESOURCE_NUM_INVALID) { + DP_ERR(p_hwfn, + "Failed to match resource %d [%s] with the MFW resources\n", + res_id, qed_hw_get_resc_name(res_id)); + return -EINVAL; + } + + rc = qed_mcp_get_resc_info(p_hwfn, p_hwfn->p_main_ptt, &resc_info, + &mcp_resp, &mcp_param); + if (rc) { + DP_NOTICE(p_hwfn, + "MFW response failure for an allocation request for resource %d [%s]\n", + res_id, qed_hw_get_resc_name(res_id)); + return rc; + } + + /* Default driver values are applied in the following cases: + * - The resource allocation MB command is not supported by the MFW + * - There is an internal error in the MFW while processing the request + * - The resource ID is unknown to the MFW + */ + if (mcp_resp != FW_MSG_CODE_RESOURCE_ALLOC_OK && + mcp_resp != FW_MSG_CODE_RESOURCE_ALLOC_DEPRECATED) { + DP_NOTICE(p_hwfn, + "Resource %d [%s]: No allocation info was received [mcp_resp 0x%x]. Applying default values [num %d, start %d].\n", + res_id, + qed_hw_get_resc_name(res_id), + mcp_resp, dflt_resc_num, dflt_resc_start); + *p_resc_num = dflt_resc_num; + *p_resc_start = dflt_resc_start; + goto out; + } + + /* Special handling for status blocks; Would be revised in future */ + if (res_id == QED_SB) { + resc_info.size -= 1; + resc_info.offset -= p_hwfn->enabled_func_idx; + } + + *p_resc_num = resc_info.size; + *p_resc_start = resc_info.offset; + +out: + /* PQs have to divide by 8 [that's the HW granularity]. + * Reduce number so it would fit. + */ + if ((res_id == QED_PQ) && ((*p_resc_num % 8) || (*p_resc_start % 8))) { + DP_INFO(p_hwfn, + "PQs need to align by 8; Number %08x --> %08x, Start %08x --> %08x\n", + *p_resc_num, + (*p_resc_num) & ~0x7, + *p_resc_start, (*p_resc_start) & ~0x7); + *p_resc_num &= ~0x7; + *p_resc_start &= ~0x7; + } - resc_num[QED_SB] = min_t(u32, - (MAX_SB_PER_PATH_BB / num_funcs), - sb_cnt_info.sb_cnt); - resc_num[QED_L2_QUEUE] = MAX_NUM_L2_QUEUES_BB / num_funcs; - resc_num[QED_VPORT] = MAX_NUM_VPORTS_BB / num_funcs; - resc_num[QED_RSS_ENG] = ETH_RSS_ENGINE_NUM_BB / num_funcs; - resc_num[QED_PQ] = MAX_QM_TX_QUEUES_BB / num_funcs; - resc_num[QED_RL] = min_t(u32, 64, resc_num[QED_VPORT]); - resc_num[QED_MAC] = ETH_NUM_MAC_FILTERS / num_funcs; - resc_num[QED_VLAN] = (ETH_NUM_VLAN_FILTERS - 1 /*For vlan0*/) / - num_funcs; - resc_num[QED_ILT] = PXP_NUM_ILT_RECORDS_BB / num_funcs; - resc_num[QED_LL2_QUEUE] = MAX_NUM_LL2_RX_QUEUES / num_funcs; - resc_num[QED_RDMA_CNQ_RAM] = NUM_OF_CMDQS_CQS / num_funcs; - resc_num[QED_RDMA_STATS_QUEUE] = RDMA_NUM_STATISTIC_COUNTERS_BB / - num_funcs; - - for (i = 0; i < QED_MAX_RESC; i++) - resc_start[i] = resc_num[i] * enabled_func_idx; + return 0; +} + +static int qed_hw_get_resc(struct qed_hwfn *p_hwfn) +{ + u8 res_id; + int rc; + + for (res_id = 0; res_id < QED_MAX_RESC; res_id++) { + rc = qed_hw_set_resc_info(p_hwfn, res_id); + if (rc) + return rc; + } /* Sanity for ILT */ - if (RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_BB) { + if ((RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_BB)) { DP_NOTICE(p_hwfn, "Can't assign ILT pages [%08x,...,%08x]\n", RESC_START(p_hwfn, QED_ILT), RESC_END(p_hwfn, QED_ILT) - 1); @@ -1562,34 +1755,12 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn) qed_hw_set_feat(p_hwfn); DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE, - "The numbers for each resource are:\n" - "SB = %d start = %d\n" - "L2_QUEUE = %d start = %d\n" - "VPORT = %d start = %d\n" - "PQ = %d start = %d\n" - "RL = %d start = %d\n" - "MAC = %d start = %d\n" - "VLAN = %d start = %d\n" - "ILT = %d start = %d\n" - "LL2_QUEUE = %d start = %d\n", - p_hwfn->hw_info.resc_num[QED_SB], - p_hwfn->hw_info.resc_start[QED_SB], - p_hwfn->hw_info.resc_num[QED_L2_QUEUE], - p_hwfn->hw_info.resc_start[QED_L2_QUEUE], - p_hwfn->hw_info.resc_num[QED_VPORT], - p_hwfn->hw_info.resc_start[QED_VPORT], - p_hwfn->hw_info.resc_num[QED_PQ], - p_hwfn->hw_info.resc_start[QED_PQ], - p_hwfn->hw_info.resc_num[QED_RL], - p_hwfn->hw_info.resc_start[QED_RL], - p_hwfn->hw_info.resc_num[QED_MAC], - p_hwfn->hw_info.resc_start[QED_MAC], - p_hwfn->hw_info.resc_num[QED_VLAN], - p_hwfn->hw_info.resc_start[QED_VLAN], - p_hwfn->hw_info.resc_num[QED_ILT], - p_hwfn->hw_info.resc_start[QED_ILT], - RESC_NUM(p_hwfn, QED_LL2_QUEUE), - RESC_START(p_hwfn, QED_LL2_QUEUE)); + "The numbers for each resource are:\n"); + for (res_id = 0; res_id < QED_MAX_RESC; res_id++) + DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE, "%s = %d start = %d\n", + qed_hw_get_resc_name(res_id), + RESC_NUM(p_hwfn, res_id), + RESC_START(p_hwfn, res_id)); return 0; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 1d113ce814e1..048f9a342413 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -8529,6 +8529,41 @@ struct mdump_config_stc { u32 valid_logs; }; +enum resource_id_enum { + RESOURCE_NUM_SB_E = 0, + RESOURCE_NUM_L2_QUEUE_E = 1, + RESOURCE_NUM_VPORT_E = 2, + RESOURCE_NUM_VMQ_E = 3, + RESOURCE_FACTOR_NUM_RSS_PF_E = 4, + RESOURCE_FACTOR_RSS_PER_VF_E = 5, + RESOURCE_NUM_RL_E = 6, + RESOURCE_NUM_PQ_E = 7, + RESOURCE_NUM_VF_E = 8, + RESOURCE_VFC_FILTER_E = 9, + RESOURCE_ILT_E = 10, + RESOURCE_CQS_E = 11, + RESOURCE_GFT_PROFILES_E = 12, + RESOURCE_NUM_TC_E = 13, + RESOURCE_NUM_RSS_ENGINES_E = 14, + RESOURCE_LL2_QUEUE_E = 15, + RESOURCE_RDMA_STATS_QUEUE_E = 16, + RESOURCE_MAX_NUM, + RESOURCE_NUM_INVALID = 0xFFFFFFFF +}; + +/* Resource ID is to be filled by the driver in the MB request + * Size, offset & flags to be filled by the MFW in the MB response + */ +struct resource_info { + enum resource_id_enum res_id; + u32 size; /* number of allocated resources */ + u32 offset; /* Offset of the 1st resource */ + u32 vf_size; + u32 vf_offset; + u32 flags; +#define RESOURCE_ELEMENT_STRICT (1 << 0) +}; + union drv_union_data { u32 ver_str[MCP_DRV_VER_STR_SIZE_DWORD]; struct mcp_mac wol_mac; @@ -8549,6 +8584,7 @@ union drv_union_data { u64 reserved_stats[11]; struct ocbb_data_stc ocbb_info; struct temperature_status_stc temp_info; + struct resource_info resource; struct bist_nvm_image_att nvm_image_att; struct mdump_config_stc mdump_config; }; @@ -8576,6 +8612,7 @@ struct public_drv_mb { #define DRV_MSG_CODE_BW_UPDATE_ACK 0x32000000 #define DRV_MSG_CODE_NIG_DRAIN 0x30000000 +#define DRV_MSG_GET_RESOURCE_ALLOC_MSG 0x34000000 #define DRV_MSG_CODE_VF_DISABLED_DONE 0xc0000000 #define DRV_MSG_CODE_CFG_VF_MSIX 0xc0010000 #define DRV_MSG_CODE_NVM_GET_FILE_ATT 0x00030000 @@ -8666,6 +8703,12 @@ struct public_drv_mb { #define DRV_MB_PARAM_SET_LED_MODE_ON 0x1 #define DRV_MB_PARAM_SET_LED_MODE_OFF 0x2 + /* Resource Allocation params - Driver version support */ +#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_MASK 0xFFFF0000 +#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT 16 +#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_MASK 0x0000FFFF +#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_SHIFT 0 + #define DRV_MB_PARAM_BIST_REGISTER_TEST 1 #define DRV_MB_PARAM_BIST_CLOCK_TEST 2 #define DRV_MB_PARAM_BIST_NVM_TEST_NUM_IMAGES 3 @@ -8694,6 +8737,9 @@ struct public_drv_mb { #define FW_MSG_CODE_DRV_UNLOAD_PORT 0x20120000 #define FW_MSG_CODE_DRV_UNLOAD_FUNCTION 0x20130000 #define FW_MSG_CODE_DRV_UNLOAD_DONE 0x21100000 +#define FW_MSG_CODE_RESOURCE_ALLOC_OK 0x34000000 +#define FW_MSG_CODE_RESOURCE_ALLOC_UNKNOWN 0x35000000 +#define FW_MSG_CODE_RESOURCE_ALLOC_DEPRECATED 0x36000000 #define FW_MSG_CODE_DRV_CFG_VF_MSIX_DONE 0xb0010000 #define FW_MSG_CODE_NVM_OK 0x00010000 diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 6b0e22d9fe4c..1e3a16edd16d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1691,7 +1691,7 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev, } qed_vf_get_num_vlan_filters(&cdev->hwfns[0], - &info->num_vlan_filters); + (u8 *)&info->num_vlan_filters); qed_vf_get_port_mac(&cdev->hwfns[0], info->port_mac); info->is_legacy = !!cdev->hwfns[0].vf_iov_info->b_pre_fp_hsi; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 092748832caf..d8e499ebb99d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1683,3 +1683,45 @@ int qed_mcp_bist_nvm_test_get_image_att(struct qed_hwfn *p_hwfn, return rc; } + +#define QED_RESC_ALLOC_VERSION_MAJOR 1 +#define QED_RESC_ALLOC_VERSION_MINOR 0 +#define QED_RESC_ALLOC_VERSION \ + ((QED_RESC_ALLOC_VERSION_MAJOR << \ + DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT) | \ + (QED_RESC_ALLOC_VERSION_MINOR << \ + DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_SHIFT)) +int qed_mcp_get_resc_info(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct resource_info *p_resc_info, + u32 *p_mcp_resp, u32 *p_mcp_param) +{ + struct qed_mcp_mb_params mb_params; + union drv_union_data *p_union_data; + int rc; + + memset(&mb_params, 0, sizeof(mb_params)); + mb_params.cmd = DRV_MSG_GET_RESOURCE_ALLOC_MSG; + mb_params.param = QED_RESC_ALLOC_VERSION; + p_union_data = (union drv_union_data *)p_resc_info; + mb_params.p_data_src = p_union_data; + mb_params.p_data_dst = p_union_data; + rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); + if (rc) + return rc; + + *p_mcp_resp = mb_params.mcp_resp; + *p_mcp_param = mb_params.mcp_param; + + DP_VERBOSE(p_hwfn, + QED_MSG_SP, + "MFW resource_info: version 0x%x, res_id 0x%x, size 0x%x, offset 0x%x, vf_size 0x%x, vf_offset 0x%x, flags 0x%x\n", + *p_mcp_param, + p_resc_info->res_id, + p_resc_info->size, + p_resc_info->offset, + p_resc_info->vf_size, + p_resc_info->vf_offset, p_resc_info->flags); + + return 0; +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index be8152d49de2..407a2c1830fb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -689,4 +689,19 @@ int qed_mcp_ov_update_eswitch(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, enum qed_ov_eswitch eswitch); +/** + * @brief - Gets the MFW allocation info for the given resource + * + * @param p_hwfn + * @param p_ptt + * @param p_resc_info - descriptor of requested resource + * @param p_mcp_resp + * @param p_mcp_param + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_get_resc_info(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct resource_info *p_resc_info, + u32 *p_mcp_resp, u32 *p_mcp_param); #endif diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 15130805d792..9755a3feb52e 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -22,7 +22,7 @@ struct qed_dev_eth_info { u8 num_tc; u8 port_mac[ETH_ALEN]; - u8 num_vlan_filters; + u16 num_vlan_filters; u16 num_mac_filters; /* Legacy VF - this affects the datapath, so qede has to know */ -- cgit v1.2.3 From d7c816733d501b59dbdc2483f2cc8e4431fd9160 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 17 Aug 2016 14:42:08 -0700 Subject: list: Split list_add() debug checking into separate function Right now, __list_add() code is repeated either in list.h or in list_debug.c, but the only differences between the two versions are the debug checks. This commit therefore extracts these debug checks into a separate __list_add_valid() function and consolidates __list_add(). Additionally this new __list_add_valid() function will stop list manipulations if a corruption is detected, instead of allowing for further corruption that may lead to even worse conditions. This is slight refactoring of the same hardening done in PaX and Grsecurity. Signed-off-by: Kees Cook Acked-by: Steven Rostedt Signed-off-by: Paul E. McKenney Acked-by: Rik van Riel --- include/linux/list.h | 22 ++++++++++++++++------ lib/list_debug.c | 48 +++++++++++++++++++++++------------------------- 2 files changed, 39 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 5809e9a2de5b..b6da9b1dce4d 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -28,27 +28,37 @@ static inline void INIT_LIST_HEAD(struct list_head *list) list->prev = list; } +#ifdef CONFIG_DEBUG_LIST +extern bool __list_add_valid(struct list_head *new, + struct list_head *prev, + struct list_head *next); +#else +static inline bool __list_add_valid(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + return true; +} +#endif + /* * Insert a new entry between two known consecutive entries. * * This is only for internal list manipulation where we know * the prev/next entries already! */ -#ifndef CONFIG_DEBUG_LIST static inline void __list_add(struct list_head *new, struct list_head *prev, struct list_head *next) { + if (!__list_add_valid(new, prev, next)) + return; + next->prev = new; new->next = next; new->prev = prev; WRITE_ONCE(prev->next, new); } -#else -extern void __list_add(struct list_head *new, - struct list_head *prev, - struct list_head *next); -#endif /** * list_add - add a new entry diff --git a/lib/list_debug.c b/lib/list_debug.c index 3859bf63561c..149dd57b583b 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -2,8 +2,7 @@ * Copyright 2006, Red Hat, Inc., Dave Jones * Released under the General Public License (GPL). * - * This file contains the linked list implementations for - * DEBUG_LIST. + * This file contains the linked list validation for DEBUG_LIST. */ #include @@ -13,33 +12,32 @@ #include /* - * Insert a new entry between two known consecutive entries. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! + * Check that the data structures for the list manipulations are reasonably + * valid. Failures here indicate memory corruption (and possibly an exploit + * attempt). */ -void __list_add(struct list_head *new, - struct list_head *prev, - struct list_head *next) +bool __list_add_valid(struct list_head *new, struct list_head *prev, + struct list_head *next) { - WARN(next->prev != prev, - "list_add corruption. next->prev should be " - "prev (%p), but was %p. (next=%p).\n", - prev, next->prev, next); - WARN(prev->next != next, - "list_add corruption. prev->next should be " - "next (%p), but was %p. (prev=%p).\n", - next, prev->next, prev); - WARN(new == prev || new == next, - "list_add double add: new=%p, prev=%p, next=%p.\n", - new, prev, next); - next->prev = new; - new->next = next; - new->prev = prev; - WRITE_ONCE(prev->next, new); + if (unlikely(next->prev != prev)) { + WARN(1, "list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n", + prev, next->prev, next); + return false; + } + if (unlikely(prev->next != next)) { + WARN(1, "list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n", + next, prev->next, prev); + return false; + } + if (unlikely(new == prev || new == next)) { + WARN(1, "list_add double add: new=%p, prev=%p, next=%p.\n", + new, prev, next); + return false; + } + return true; } -EXPORT_SYMBOL(__list_add); +EXPORT_SYMBOL(__list_add_valid); void __list_del_entry(struct list_head *entry) { -- cgit v1.2.3 From 54acd4397d7e7a725c94101180cd9f38ef701acc Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 17 Aug 2016 14:42:09 -0700 Subject: rculist: Consolidate DEBUG_LIST for list_add_rcu() This commit consolidates the debug checking for list_add_rcu() into the new single __list_add_valid() debug function. Notably, this commit fixes the sanity check that was added in commit 17a801f4bfeb ("list_debug: WARN for adding something already in the list"), which wasn't checking RCU-protected lists. Signed-off-by: Kees Cook Acked-by: Steven Rostedt Signed-off-by: Paul E. McKenney Acked-by: Rik van Riel --- include/linux/rculist.h | 8 +++----- lib/list_debug.c | 19 ------------------- 2 files changed, 3 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 8beb98dcf14f..4f7a9561b8c4 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -45,19 +45,17 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list) * This is only for internal list manipulation where we know * the prev/next entries already! */ -#ifndef CONFIG_DEBUG_LIST static inline void __list_add_rcu(struct list_head *new, struct list_head *prev, struct list_head *next) { + if (!__list_add_valid(new, prev, next)) + return; + new->next = next; new->prev = prev; rcu_assign_pointer(list_next_rcu(prev), new); next->prev = new; } -#else -void __list_add_rcu(struct list_head *new, - struct list_head *prev, struct list_head *next); -#endif /** * list_add_rcu - add a new entry to rcu-protected list diff --git a/lib/list_debug.c b/lib/list_debug.c index 149dd57b583b..d0b89b9d0736 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -77,22 +77,3 @@ void list_del(struct list_head *entry) entry->prev = LIST_POISON2; } EXPORT_SYMBOL(list_del); - -/* - * RCU variants. - */ -void __list_add_rcu(struct list_head *new, - struct list_head *prev, struct list_head *next) -{ - WARN(next->prev != prev, - "list_add_rcu corruption. next->prev should be prev (%p), but was %p. (next=%p).\n", - prev, next->prev, next); - WARN(prev->next != next, - "list_add_rcu corruption. prev->next should be next (%p), but was %p. (prev=%p).\n", - next, prev->next, prev); - new->next = next; - new->prev = prev; - rcu_assign_pointer(list_next_rcu(prev), new); - next->prev = new; -} -EXPORT_SYMBOL(__list_add_rcu); -- cgit v1.2.3 From 0cd340dcb05c4a43742fe156f36737bb2a321bfd Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 17 Aug 2016 14:42:10 -0700 Subject: list: Split list_del() debug checking into separate function Similar to the list_add() debug consolidation, this commit consolidates the debug checking performed during CONFIG_DEBUG_LIST into a new __list_del_entry_valid() function, and stops list updates when corruption is found. Refactored from same hardening in PaX and Grsecurity. Signed-off-by: Kees Cook Acked-by: Steven Rostedt Signed-off-by: Paul E. McKenney Acked-by: Rik van Riel --- include/linux/list.h | 15 +++++++++------ lib/list_debug.c | 53 +++++++++++++++++++++++----------------------------- 2 files changed, 32 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index b6da9b1dce4d..d1039ecaf94f 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -32,6 +32,7 @@ static inline void INIT_LIST_HEAD(struct list_head *list) extern bool __list_add_valid(struct list_head *new, struct list_head *prev, struct list_head *next); +extern bool __list_del_entry_valid(struct list_head *entry); #else static inline bool __list_add_valid(struct list_head *new, struct list_head *prev, @@ -39,6 +40,10 @@ static inline bool __list_add_valid(struct list_head *new, { return true; } +static inline bool __list_del_entry_valid(struct list_head *entry) +{ + return true; +} #endif /* @@ -106,22 +111,20 @@ static inline void __list_del(struct list_head * prev, struct list_head * next) * Note: list_empty() on entry does not return true after this, the entry is * in an undefined state. */ -#ifndef CONFIG_DEBUG_LIST static inline void __list_del_entry(struct list_head *entry) { + if (!__list_del_entry_valid(entry)) + return; + __list_del(entry->prev, entry->next); } static inline void list_del(struct list_head *entry) { - __list_del(entry->prev, entry->next); + __list_del_entry(entry); entry->next = LIST_POISON1; entry->prev = LIST_POISON2; } -#else -extern void __list_del_entry(struct list_head *entry); -extern void list_del(struct list_head *entry); -#endif /** * list_replace - replace old entry by new one diff --git a/lib/list_debug.c b/lib/list_debug.c index d0b89b9d0736..276565fca2a6 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -39,41 +39,34 @@ bool __list_add_valid(struct list_head *new, struct list_head *prev, } EXPORT_SYMBOL(__list_add_valid); -void __list_del_entry(struct list_head *entry) +bool __list_del_entry_valid(struct list_head *entry) { struct list_head *prev, *next; prev = entry->prev; next = entry->next; - if (WARN(next == LIST_POISON1, - "list_del corruption, %p->next is LIST_POISON1 (%p)\n", - entry, LIST_POISON1) || - WARN(prev == LIST_POISON2, - "list_del corruption, %p->prev is LIST_POISON2 (%p)\n", - entry, LIST_POISON2) || - WARN(prev->next != entry, - "list_del corruption. prev->next should be %p, " - "but was %p\n", entry, prev->next) || - WARN(next->prev != entry, - "list_del corruption. next->prev should be %p, " - "but was %p\n", entry, next->prev)) - return; - - __list_del(prev, next); -} -EXPORT_SYMBOL(__list_del_entry); + if (unlikely(next == LIST_POISON1)) { + WARN(1, "list_del corruption, %p->next is LIST_POISON1 (%p)\n", + entry, LIST_POISON1); + return false; + } + if (unlikely(prev == LIST_POISON2)) { + WARN(1, "list_del corruption, %p->prev is LIST_POISON2 (%p)\n", + entry, LIST_POISON2); + return false; + } + if (unlikely(prev->next != entry)) { + WARN(1, "list_del corruption. prev->next should be %p, but was %p\n", + entry, prev->next); + return false; + } + if (unlikely(next->prev != entry)) { + WARN(1, "list_del corruption. next->prev should be %p, but was %p\n", + entry, next->prev); + return false; + } + return true; -/** - * list_del - deletes entry from list. - * @entry: the element to delete from the list. - * Note: list_empty on entry does not return true after this, the entry is - * in an undefined state. - */ -void list_del(struct list_head *entry) -{ - __list_del_entry(entry); - entry->next = LIST_POISON1; - entry->prev = LIST_POISON2; } -EXPORT_SYMBOL(list_del); +EXPORT_SYMBOL(__list_del_entry_valid); -- cgit v1.2.3 From de54ebbe26bb371a6f1fbc0593372232f04e3107 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 17 Aug 2016 14:42:11 -0700 Subject: bug: Provide toggle for BUG on data corruption The kernel checks for cases of data structure corruption under some CONFIGs (e.g. CONFIG_DEBUG_LIST). When corruption is detected, some systems may want to BUG() immediately instead of letting the system run with known corruption. Usually these kinds of manipulation primitives can be used by security flaws to gain arbitrary memory write control. This provides a new config CONFIG_BUG_ON_DATA_CORRUPTION and a corresponding macro CHECK_DATA_CORRUPTION for handling these situations. Notably, even if not BUGing, the kernel should not continue processing the corrupted structure. This is inspired by similar hardening by Syed Rameez Mustafa in MSM kernels, and in PaX and Grsecurity, which is likely in response to earlier removal of the BUG calls in commit 924d9addb9b1 ("list debugging: use WARN() instead of BUG()"). Signed-off-by: Kees Cook Acked-by: Steven Rostedt Signed-off-by: Paul E. McKenney Acked-by: Rik van Riel --- include/linux/bug.h | 17 ++++++++++++++++ lib/Kconfig.debug | 10 ++++++++++ lib/list_debug.c | 57 +++++++++++++++++++++-------------------------------- 3 files changed, 49 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bug.h b/include/linux/bug.h index 292d6a10b0c2..baff2e8fc8a8 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -121,4 +121,21 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr, } #endif /* CONFIG_GENERIC_BUG */ + +/* + * Since detected data corruption should stop operation on the affected + * structures, this returns false if the corruption condition is found. + */ +#define CHECK_DATA_CORRUPTION(condition, fmt, ...) \ + do { \ + if (unlikely(condition)) { \ + if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \ + pr_err(fmt, ##__VA_ARGS__); \ + BUG(); \ + } else \ + WARN(1, fmt, ##__VA_ARGS__); \ + return false; \ + } \ + } while (0) + #endif /* _LINUX_BUG_H */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 33bc56cf60d7..07a6fac930c5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1960,6 +1960,16 @@ config TEST_STATIC_KEYS If unsure, say N. +config BUG_ON_DATA_CORRUPTION + bool "Trigger a BUG when data corruption is detected" + select CONFIG_DEBUG_LIST + help + Select this option if the kernel should BUG when it encounters + data corruption in kernel memory structures when they get checked + for validity. + + If unsure, say N. + source "samples/Kconfig" source "lib/Kconfig.kgdb" diff --git a/lib/list_debug.c b/lib/list_debug.c index 276565fca2a6..7f7bfa55eb6d 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -20,21 +20,16 @@ bool __list_add_valid(struct list_head *new, struct list_head *prev, struct list_head *next) { - if (unlikely(next->prev != prev)) { - WARN(1, "list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n", - prev, next->prev, next); - return false; - } - if (unlikely(prev->next != next)) { - WARN(1, "list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n", - next, prev->next, prev); - return false; - } - if (unlikely(new == prev || new == next)) { - WARN(1, "list_add double add: new=%p, prev=%p, next=%p.\n", - new, prev, next); - return false; - } + CHECK_DATA_CORRUPTION(next->prev != prev, + "list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n", + prev, next->prev, next); + CHECK_DATA_CORRUPTION(prev->next != next, + "list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n", + next, prev->next, prev); + CHECK_DATA_CORRUPTION(new == prev || new == next, + "list_add double add: new=%p, prev=%p, next=%p.\n", + new, prev, next); + return true; } EXPORT_SYMBOL(__list_add_valid); @@ -46,26 +41,18 @@ bool __list_del_entry_valid(struct list_head *entry) prev = entry->prev; next = entry->next; - if (unlikely(next == LIST_POISON1)) { - WARN(1, "list_del corruption, %p->next is LIST_POISON1 (%p)\n", - entry, LIST_POISON1); - return false; - } - if (unlikely(prev == LIST_POISON2)) { - WARN(1, "list_del corruption, %p->prev is LIST_POISON2 (%p)\n", - entry, LIST_POISON2); - return false; - } - if (unlikely(prev->next != entry)) { - WARN(1, "list_del corruption. prev->next should be %p, but was %p\n", - entry, prev->next); - return false; - } - if (unlikely(next->prev != entry)) { - WARN(1, "list_del corruption. next->prev should be %p, but was %p\n", - entry, next->prev); - return false; - } + CHECK_DATA_CORRUPTION(next == LIST_POISON1, + "list_del corruption, %p->next is LIST_POISON1 (%p)\n", + entry, LIST_POISON1); + CHECK_DATA_CORRUPTION(prev == LIST_POISON2, + "list_del corruption, %p->prev is LIST_POISON2 (%p)\n", + entry, LIST_POISON2); + CHECK_DATA_CORRUPTION(prev->next != entry, + "list_del corruption. prev->next should be %p, but was %p\n", + entry, prev->next); + CHECK_DATA_CORRUPTION(next->prev != entry, + "list_del corruption. next->prev should be %p, but was %p\n", + entry, next->prev); return true; } -- cgit v1.2.3 From 556d299fcb4af8f2e8eacf311c4eee352c746788 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 31 Oct 2016 13:21:02 +0100 Subject: net: pim: add common pimhdr struct and helpers Add the common pimhdr structure and helpers to access it, also cleanup the format of the header file. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/pim.h | 44 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pim.h b/include/linux/pim.h index e1d756f81348..354235a2691b 100644 --- a/include/linux/pim.h +++ b/include/linux/pim.h @@ -1,6 +1,7 @@ #ifndef __LINUX_PIM_H #define __LINUX_PIM_H +#include #include /* Message types - V1 */ @@ -13,20 +14,47 @@ #define PIM_NULL_REGISTER cpu_to_be32(0x40000000) -static inline bool ipmr_pimsm_enabled(void) -{ - return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2); -} +/* RFC7761, sec 4.9: + * The PIM header common to all PIM messages is: + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |PIM Ver| Type | Reserved | Checksum | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct pimhdr { + __u8 type; + __u8 reserved; + __be16 csum; +}; /* PIMv2 register message header layout (ietf-draft-idmr-pimvsm-v2-00.ps */ -struct pimreghdr -{ +struct pimreghdr { __u8 type; __u8 reserved; __be16 csum; __be32 flags; }; -struct sk_buff; -extern int pim_rcv_v1(struct sk_buff *); +int pim_rcv_v1(struct sk_buff *skb); + +static inline bool ipmr_pimsm_enabled(void) +{ + return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2); +} + +static inline struct pimhdr *pim_hdr(const struct sk_buff *skb) +{ + return (struct pimhdr *)skb_transport_header(skb); +} + +static inline u8 pim_hdr_version(const struct pimhdr *pimhdr) +{ + return pimhdr->type >> 4; +} + +static inline u8 pim_hdr_type(const struct pimhdr *pimhdr) +{ + return pimhdr->type & 0xf; +} #endif -- cgit v1.2.3 From 20bb6ce9879e19eee7539329eaa2408d12b00306 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 31 Oct 2016 13:21:03 +0100 Subject: net: pim: add a helper to check for IPv4 all pim routers address Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/pim.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pim.h b/include/linux/pim.h index 354235a2691b..1b6c0dbba94e 100644 --- a/include/linux/pim.h +++ b/include/linux/pim.h @@ -57,4 +57,10 @@ static inline u8 pim_hdr_type(const struct pimhdr *pimhdr) { return pimhdr->type & 0xf; } + +/* check if the address is 224.0.0.13, RFC7761 sec 4.3.1 */ +static inline bool pim_ipv4_all_pim_routers(__be32 addr) +{ + return addr == htonl(0xE000000D); +} #endif -- cgit v1.2.3 From 56245cae19f5ccb371fa63b09bb6b9ce7c0f1266 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 31 Oct 2016 13:21:04 +0100 Subject: net: pim: add all RFC7761 message types Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/pim.h | 31 ++++++++++++++++++++++++++++++- net/ipv4/ipmr.c | 2 +- net/ipv6/ip6mr.c | 2 +- 3 files changed, 32 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pim.h b/include/linux/pim.h index 1b6c0dbba94e..0e81b2778ae0 100644 --- a/include/linux/pim.h +++ b/include/linux/pim.h @@ -10,7 +10,36 @@ /* Message types - V2 */ #define PIM_VERSION 2 -#define PIM_REGISTER 1 + +/* RFC7761, sec 4.9: + * Type + * Types for specific PIM messages. PIM Types are: + * + * Message Type Destination + * --------------------------------------------------------------------- + * 0 = Hello Multicast to ALL-PIM-ROUTERS + * 1 = Register Unicast to RP + * 2 = Register-Stop Unicast to source of Register + * packet + * 3 = Join/Prune Multicast to ALL-PIM-ROUTERS + * 4 = Bootstrap Multicast to ALL-PIM-ROUTERS + * 5 = Assert Multicast to ALL-PIM-ROUTERS + * 6 = Graft (used in PIM-DM only) Unicast to RPF'(S) + * 7 = Graft-Ack (used in PIM-DM only) Unicast to source of Graft + * packet + * 8 = Candidate-RP-Advertisement Unicast to Domain's BSR + */ +enum { + PIM_TYPE_HELLO, + PIM_TYPE_REGISTER, + PIM_TYPE_REGISTER_STOP, + PIM_TYPE_JOIN_PRUNE, + PIM_TYPE_BOOTSTRAP, + PIM_TYPE_ASSERT, + PIM_TYPE_GRAFT, + PIM_TYPE_GRAFT_ACK, + PIM_TYPE_CANDIDATE_RP_ADV +}; #define PIM_NULL_REGISTER cpu_to_be32(0x40000000) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5f006e13de56..51d71a70fbbe 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2053,7 +2053,7 @@ static int pim_rcv(struct sk_buff *skb) goto drop; pim = (struct pimreghdr *)skb_transport_header(skb); - if (pim->type != ((PIM_VERSION << 4) | (PIM_REGISTER)) || + if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) || (pim->flags & PIM_NULL_REGISTER) || (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && csum_fold(skb_checksum(skb, 0, skb->len, 0)))) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 7f4265b1649b..52101b37ad6e 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -636,7 +636,7 @@ static int pim6_rcv(struct sk_buff *skb) goto drop; pim = (struct pimreghdr *)skb_transport_header(skb); - if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) || + if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) || (pim->flags & PIM_NULL_REGISTER) || (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, sizeof(*pim), IPPROTO_PIM, -- cgit v1.2.3 From e950604782440c8635d289552bb5db58658fcbe9 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 7 Oct 2016 21:23:12 -0700 Subject: rpmsg: Introduce a driver override mechanism Similar to other subsystems it's useful to provide a mechanism to force a specific driver match on a device, so introduce this. Signed-off-by: Bjorn Andersson --- drivers/rpmsg/rpmsg_core.c | 3 +++ include/linux/rpmsg.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c index b6ea9ffa7381..087d4db896c8 100644 --- a/drivers/rpmsg/rpmsg_core.c +++ b/drivers/rpmsg/rpmsg_core.c @@ -315,6 +315,9 @@ static int rpmsg_dev_match(struct device *dev, struct device_driver *drv) const struct rpmsg_device_id *ids = rpdrv->id_table; unsigned int i; + if (rpdev->driver_override) + return !strcmp(rpdev->driver_override, drv->name); + if (ids) for (i = 0; ids[i].name[0]; i++) if (rpmsg_id_match(rpdev, &ids[i])) diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 452d393cc8dd..7ad6c205f110 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -64,6 +64,7 @@ struct rpmsg_channel_info { * rpmsg_device - device that belong to the rpmsg bus * @dev: the device struct * @id: device id (used to match between rpmsg drivers and devices) + * @driver_override: driver name to force a match * @src: local address * @dst: destination address * @ept: the rpmsg endpoint of this channel @@ -72,6 +73,7 @@ struct rpmsg_channel_info { struct rpmsg_device { struct device dev; struct rpmsg_device_id id; + char *driver_override; u32 src; u32 dst; struct rpmsg_endpoint *ept; -- cgit v1.2.3 From 2c8a57088045a58958372d405586c16e3e12f4e1 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 21 Oct 2016 10:25:37 -0700 Subject: rpmsg: Provide function stubs for API Provide function stubs for the rpmsg API to allow clients to be compile tested without having CONFIG_RPMSG enabled. Signed-off-by: Bjorn Andersson --- include/linux/rpmsg.h | 123 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 113 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 7ad6c205f110..18f9e1ae4b7e 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -37,6 +37,7 @@ #include #include +#include #include #include #include @@ -134,6 +135,8 @@ struct rpmsg_driver { int (*callback)(struct rpmsg_device *, void *, int, void *, u32); }; +#if IS_ENABLED(CONFIG_RPMSG) + int register_rpmsg_device(struct rpmsg_device *dev); void unregister_rpmsg_device(struct rpmsg_device *dev); int __register_rpmsg_driver(struct rpmsg_driver *drv, struct module *owner); @@ -143,6 +146,116 @@ struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *, rpmsg_rx_cb_t cb, void *priv, struct rpmsg_channel_info chinfo); +int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len); +int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst); +int rpmsg_send_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, + void *data, int len); + +int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len); +int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst); +int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, + void *data, int len); + +#else + +static inline int register_rpmsg_device(struct rpmsg_device *dev) +{ + return -ENXIO; +} + +static inline void unregister_rpmsg_device(struct rpmsg_device *dev) +{ + /* This shouldn't be possible */ + WARN_ON(1); +} + +static inline int __register_rpmsg_driver(struct rpmsg_driver *drv, + struct module *owner) +{ + /* This shouldn't be possible */ + WARN_ON(1); + + return -ENXIO; +} + +static inline void unregister_rpmsg_driver(struct rpmsg_driver *drv) +{ + /* This shouldn't be possible */ + WARN_ON(1); +} + +static inline void rpmsg_destroy_ept(struct rpmsg_endpoint *ept) +{ + /* This shouldn't be possible */ + WARN_ON(1); +} + +static inline struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *rpdev, + rpmsg_rx_cb_t cb, + void *priv, + struct rpmsg_channel_info chinfo) +{ + /* This shouldn't be possible */ + WARN_ON(1); + + return ERR_PTR(-ENXIO); +} + +static inline int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len) +{ + /* This shouldn't be possible */ + WARN_ON(1); + + return -ENXIO; +} + +static inline int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, + u32 dst) +{ + /* This shouldn't be possible */ + WARN_ON(1); + + return -ENXIO; + +} + +static inline int rpmsg_send_offchannel(struct rpmsg_endpoint *ept, u32 src, + u32 dst, void *data, int len) +{ + /* This shouldn't be possible */ + WARN_ON(1); + + return -ENXIO; +} + +static inline int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len) +{ + /* This shouldn't be possible */ + WARN_ON(1); + + return -ENXIO; +} + +static inline int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, + int len, u32 dst) +{ + /* This shouldn't be possible */ + WARN_ON(1); + + return -ENXIO; +} + +static inline int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, + u32 dst, void *data, int len) +{ + /* This shouldn't be possible */ + WARN_ON(1); + + return -ENXIO; +} + +#endif /* IS_ENABLED(CONFIG_RPMSG) */ + /* use a macro to avoid include chaining to get THIS_MODULE */ #define register_rpmsg_driver(drv) \ __register_rpmsg_driver(drv, THIS_MODULE) @@ -159,14 +272,4 @@ struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *, module_driver(__rpmsg_driver, register_rpmsg_driver, \ unregister_rpmsg_driver) -int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len); -int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst); -int rpmsg_send_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, - void *data, int len); - -int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len); -int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst); -int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, - void *data, int len); - #endif /* _LINUX_RPMSG_H */ -- cgit v1.2.3 From d71d9ae14a0942fae519d890a743b12679e3d153 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:03 -0600 Subject: blk-cgroup: use op_is_sync to check for synchronous requests Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index ddaf28d0988f..01b62e7bac74 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -599,7 +599,7 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, __percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH); - if (op & REQ_SYNC) + if (op_is_sync(op)) cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC]; else cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC]; -- cgit v1.2.3 From 6f6b29171a192e84b666c816e49d2175afbbb09f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:07 -0600 Subject: block: don't use REQ_SYNC in the READ_SYNC definition Reads are synchronous per definition, don't add another flag for it. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index e3e878f12b25..5e0078fceed7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -196,7 +196,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define READ REQ_OP_READ #define WRITE REQ_OP_WRITE -#define READ_SYNC REQ_SYNC +#define READ_SYNC 0 #define WRITE_SYNC (REQ_SYNC | REQ_NOIDLE) #define WRITE_ODIRECT REQ_SYNC #define WRITE_FLUSH (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH) -- cgit v1.2.3 From b685d3d65ac791406e0dfd8779cc9b3707fea5a3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:08 -0600 Subject: block: treat REQ_FUA and REQ_PREFLUSH as synchronous MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of requiring everyone to specify the REQ_SYNC flag aѕ well. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 8 +++++++- include/linux/fs.h | 6 +++--- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 3fa62cabe8d2..107d23d18096 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -216,9 +216,15 @@ static inline bool op_is_write(unsigned int op) return (op & 1); } +/* + * Reads are always treated as synchronous, as are requests with the FUA or + * PREFLUSH flag. Other operations may be marked as synchronous using the + * REQ_SYNC flag. + */ static inline bool op_is_sync(unsigned int op) { - return (op & REQ_OP_MASK) == REQ_OP_READ || (op & REQ_SYNC); + return (op & REQ_OP_MASK) == REQ_OP_READ || + (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH)); } typedef unsigned int blk_qc_t; diff --git a/include/linux/fs.h b/include/linux/fs.h index 5e0078fceed7..ccedccb28ec8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -199,9 +199,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define READ_SYNC 0 #define WRITE_SYNC (REQ_SYNC | REQ_NOIDLE) #define WRITE_ODIRECT REQ_SYNC -#define WRITE_FLUSH (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH) -#define WRITE_FUA (REQ_SYNC | REQ_NOIDLE | REQ_FUA) -#define WRITE_FLUSH_FUA (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA) +#define WRITE_FLUSH (REQ_NOIDLE | REQ_PREFLUSH) +#define WRITE_FUA (REQ_NOIDLE | REQ_FUA) +#define WRITE_FLUSH_FUA (REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA) /* * Attribute flags. These should be or-ed together to figure out what -- cgit v1.2.3 From a2b809672ee6fcb4d5756ea815725b3dbaea654e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:09 -0600 Subject: block: replace REQ_NOIDLE with REQ_IDLE Noidle should be the default for writes as seen by all the compounds definitions in fs.h using it. In fact only direct I/O really should be using NODILE, so turn the whole flag around to get the defaults right, which will make our life much easier especially onces the WRITE_* defines go away. This assumes all the existing "raw" users of REQ_SYNC for writes want noidle behavior, which seems to be spot on from a quick audit. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- Documentation/block/cfq-iosched.txt | 32 ++++++++++++++++---------------- block/cfq-iosched.c | 11 ++++++++--- drivers/block/drbd/drbd_actlog.c | 2 +- include/linux/blk_types.h | 4 ++-- include/linux/fs.h | 10 +++++----- include/trace/events/f2fs.h | 2 +- 6 files changed, 33 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/Documentation/block/cfq-iosched.txt b/Documentation/block/cfq-iosched.txt index 1e4f835a659d..895bd3813115 100644 --- a/Documentation/block/cfq-iosched.txt +++ b/Documentation/block/cfq-iosched.txt @@ -240,11 +240,11 @@ All cfq queues doing synchronous sequential IO go on to sync-idle tree. On this tree we idle on each queue individually. All synchronous non-sequential queues go on sync-noidle tree. Also any -request which are marked with REQ_NOIDLE go on this service tree. On this -tree we do not idle on individual queues instead idle on the whole group -of queues or the tree. So if there are 4 queues waiting for IO to dispatch -we will idle only once last queue has dispatched the IO and there is -no more IO on this service tree. +synchronous write request which is not marked with REQ_IDLE goes on this +service tree. On this tree we do not idle on individual queues instead idle +on the whole group of queues or the tree. So if there are 4 queues waiting +for IO to dispatch we will idle only once last queue has dispatched the IO +and there is no more IO on this service tree. All async writes go on async service tree. There is no idling on async queues. @@ -257,17 +257,17 @@ tree idling provides isolation with buffered write queues on async tree. FAQ === -Q1. Why to idle at all on queues marked with REQ_NOIDLE. +Q1. Why to idle at all on queues not marked with REQ_IDLE. -A1. We only do tree idle (all queues on sync-noidle tree) on queues marked - with REQ_NOIDLE. This helps in providing isolation with all the sync-idle +A1. We only do tree idle (all queues on sync-noidle tree) on queues not marked + with REQ_IDLE. This helps in providing isolation with all the sync-idle queues. Otherwise in presence of many sequential readers, other synchronous IO might not get fair share of disk. For example, if there are 10 sequential readers doing IO and they get - 100ms each. If a REQ_NOIDLE request comes in, it will be scheduled - roughly after 1 second. If after completion of REQ_NOIDLE request we - do not idle, and after a couple of milli seconds a another REQ_NOIDLE + 100ms each. If a !REQ_IDLE request comes in, it will be scheduled + roughly after 1 second. If after completion of !REQ_IDLE request we + do not idle, and after a couple of milli seconds a another !REQ_IDLE request comes in, again it will be scheduled after 1second. Repeat it and notice how a workload can lose its disk share and suffer due to multiple sequential readers. @@ -276,16 +276,16 @@ A1. We only do tree idle (all queues on sync-noidle tree) on queues marked context of fsync, and later some journaling data is written. Journaling data comes in only after fsync has finished its IO (atleast for ext4 that seemed to be the case). Now if one decides not to idle on fsync - thread due to REQ_NOIDLE, then next journaling write will not get + thread due to !REQ_IDLE, then next journaling write will not get scheduled for another second. A process doing small fsync, will suffer badly in presence of multiple sequential readers. - Hence doing tree idling on threads using REQ_NOIDLE flag on requests + Hence doing tree idling on threads using !REQ_IDLE flag on requests provides isolation from multiple sequential readers and at the same time we do not idle on individual threads. -Q2. When to specify REQ_NOIDLE -A2. I would think whenever one is doing synchronous write and not expecting +Q2. When to specify REQ_IDLE +A2. I would think whenever one is doing synchronous write and expecting more writes to be dispatched from same context soon, should be able - to specify REQ_NOIDLE on writes and that probably should work well for + to specify REQ_IDLE on writes and that probably should work well for most of the cases. diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index f28db97c3fe0..dcbed8c9c82c 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -3914,6 +3914,12 @@ cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfqq->seek_history |= (sdist > CFQQ_SEEK_THR); } +static inline bool req_noidle(struct request *req) +{ + return req_op(req) == REQ_OP_WRITE && + (req->cmd_flags & (REQ_SYNC | REQ_IDLE)) == REQ_SYNC; +} + /* * Disable idle window if the process thinks too long or seeks so much that * it doesn't matter @@ -3935,7 +3941,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (cfqq->queued[0] + cfqq->queued[1] >= 4) cfq_mark_cfqq_deep(cfqq); - if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE)) + if (cfqq->next_rq && req_noidle(cfqq->next_rq)) enable_idle = 0; else if (!atomic_read(&cic->icq.ioc->active_ref) || !cfqd->cfq_slice_idle || @@ -4220,8 +4226,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) const int sync = rq_is_sync(rq); u64 now = ktime_get_ns(); - cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", - !!(rq->cmd_flags & REQ_NOIDLE)); + cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", req_noidle(rq)); cfq_update_hw_tag(cfqd); diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 2d3d50ab74bf..8d7bcfa49c12 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -148,7 +148,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device, if ((op == REQ_OP_WRITE) && !test_bit(MD_NO_FUA, &device->flags)) op_flags |= REQ_FUA | REQ_PREFLUSH; - op_flags |= REQ_SYNC | REQ_NOIDLE; + op_flags |= REQ_SYNC; bio = bio_alloc_drbd(GFP_NOIO); bio->bi_bdev = bdev->md_bdev; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 107d23d18096..63b750a3b165 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -175,7 +175,7 @@ enum req_flag_bits { __REQ_META, /* metadata io request */ __REQ_PRIO, /* boost priority in cfq */ __REQ_NOMERGE, /* don't touch this for merging */ - __REQ_NOIDLE, /* don't anticipate more IO after this one */ + __REQ_IDLE, /* anticipate more IO after this one */ __REQ_INTEGRITY, /* I/O includes block integrity payload */ __REQ_FUA, /* forced unit access */ __REQ_PREFLUSH, /* request for cache flush */ @@ -190,7 +190,7 @@ enum req_flag_bits { #define REQ_META (1ULL << __REQ_META) #define REQ_PRIO (1ULL << __REQ_PRIO) #define REQ_NOMERGE (1ULL << __REQ_NOMERGE) -#define REQ_NOIDLE (1ULL << __REQ_NOIDLE) +#define REQ_IDLE (1ULL << __REQ_IDLE) #define REQ_INTEGRITY (1ULL << __REQ_INTEGRITY) #define REQ_FUA (1ULL << __REQ_FUA) #define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH) diff --git a/include/linux/fs.h b/include/linux/fs.h index ccedccb28ec8..46a74209917f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -197,11 +197,11 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define WRITE REQ_OP_WRITE #define READ_SYNC 0 -#define WRITE_SYNC (REQ_SYNC | REQ_NOIDLE) -#define WRITE_ODIRECT REQ_SYNC -#define WRITE_FLUSH (REQ_NOIDLE | REQ_PREFLUSH) -#define WRITE_FUA (REQ_NOIDLE | REQ_FUA) -#define WRITE_FLUSH_FUA (REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA) +#define WRITE_SYNC REQ_SYNC +#define WRITE_ODIRECT (REQ_SYNC | REQ_IDLE) +#define WRITE_FLUSH REQ_PREFLUSH +#define WRITE_FUA REQ_FUA +#define WRITE_FLUSH_FUA (REQ_PREFLUSH | REQ_FUA) /* * Attribute flags. These should be or-ed together to figure out what diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 903a09165bb1..a9d34424450d 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -32,7 +32,7 @@ TRACE_DEFINE_ENUM(LFS); TRACE_DEFINE_ENUM(SSR); TRACE_DEFINE_ENUM(__REQ_RAHEAD); TRACE_DEFINE_ENUM(__REQ_SYNC); -TRACE_DEFINE_ENUM(__REQ_NOIDLE); +TRACE_DEFINE_ENUM(__REQ_IDLE); TRACE_DEFINE_ENUM(__REQ_PREFLUSH); TRACE_DEFINE_ENUM(__REQ_FUA); TRACE_DEFINE_ENUM(__REQ_PRIO); -- cgit v1.2.3 From 70fd76140a6cb63262bd47b68d57b42e889c10ee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:10 -0600 Subject: block,fs: use REQ_* flags directly Remove the WRITE_* and READ_SYNC wrappers, and just use the flags directly. Where applicable this also drops usage of the bio_set_op_attrs wrapper. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-flush.c | 4 ++-- drivers/block/drbd/drbd_receiver.c | 2 +- drivers/block/xen-blkback/blkback.c | 10 ++++---- drivers/md/bcache/btree.c | 4 ++-- drivers/md/bcache/debug.c | 4 ++-- drivers/md/bcache/request.c | 2 +- drivers/md/bcache/super.c | 4 ++-- drivers/md/dm-bufio.c | 2 +- drivers/md/dm-log.c | 2 +- drivers/md/dm-raid1.c | 4 ++-- drivers/md/dm-snap-persistent.c | 4 ++-- drivers/md/dm.c | 2 +- drivers/md/md.c | 4 ++-- drivers/md/raid5-cache.c | 4 ++-- drivers/md/raid5.c | 2 +- drivers/nvme/target/io-cmd.c | 4 ++-- drivers/target/target_core_iblock.c | 8 +++---- fs/btrfs/disk-io.c | 6 ++--- fs/btrfs/extent_io.c | 16 ++++++------- fs/btrfs/inode.c | 6 ++--- fs/btrfs/scrub.c | 2 +- fs/btrfs/volumes.c | 2 +- fs/btrfs/volumes.h | 2 +- fs/buffer.c | 8 +++---- fs/direct-io.c | 2 +- fs/ext4/mmp.c | 6 ++--- fs/ext4/page-io.c | 2 +- fs/ext4/super.c | 2 +- fs/f2fs/checkpoint.c | 4 ++-- fs/f2fs/data.c | 16 ++++++------- fs/f2fs/gc.c | 6 ++--- fs/f2fs/inline.c | 2 +- fs/f2fs/node.c | 4 ++-- fs/f2fs/segment.c | 8 +++---- fs/f2fs/super.c | 2 +- fs/gfs2/log.c | 4 ++-- fs/gfs2/meta_io.c | 6 ++--- fs/gfs2/ops_fstype.c | 2 +- fs/hfsplus/super.c | 4 ++-- fs/jbd2/checkpoint.c | 2 +- fs/jbd2/commit.c | 9 +++---- fs/jbd2/journal.c | 15 ++++++------ fs/jbd2/revoke.c | 2 +- fs/jfs/jfs_logmgr.c | 4 ++-- fs/mpage.c | 6 ++--- fs/nilfs2/super.c | 2 +- fs/ocfs2/cluster/heartbeat.c | 2 +- fs/reiserfs/journal.c | 6 +++-- fs/xfs/xfs_aops.c | 11 +++++---- fs/xfs/xfs_buf.c | 2 +- include/linux/fs.h | 47 ------------------------------------- include/trace/events/f2fs.h | 10 ++++---- kernel/power/swap.c | 19 +++++++-------- 53 files changed, 133 insertions(+), 182 deletions(-) (limited to 'include/linux') diff --git a/block/blk-flush.c b/block/blk-flush.c index 95f1d4d357df..d35beca18481 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -330,7 +330,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq) } flush_rq->cmd_type = REQ_TYPE_FS; - flush_rq->cmd_flags = REQ_OP_FLUSH | WRITE_FLUSH; + flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH; flush_rq->rq_flags |= RQF_FLUSH_SEQ; flush_rq->rq_disk = first_rq->rq_disk; flush_rq->end_io = flush_end_io; @@ -486,7 +486,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, bio = bio_alloc(gfp_mask, 0); bio->bi_bdev = bdev; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; ret = submit_bio_wait(bio); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 942384f34e22..a89538cb3eaa 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1266,7 +1266,7 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont bio->bi_bdev = device->ldev->backing_bdev; bio->bi_private = octx; bio->bi_end_io = one_flush_endio; - bio_set_op_attrs(bio, REQ_OP_FLUSH, WRITE_FLUSH); + bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH; device->flush_jif = jiffies; set_bit(FLUSH_PENDING, &device->flags); diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 4a80ee752597..726c32e35db9 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -1253,14 +1253,14 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, case BLKIF_OP_WRITE: ring->st_wr_req++; operation = REQ_OP_WRITE; - operation_flags = WRITE_ODIRECT; + operation_flags = REQ_SYNC | REQ_IDLE; break; case BLKIF_OP_WRITE_BARRIER: drain = true; case BLKIF_OP_FLUSH_DISKCACHE: ring->st_f_req++; operation = REQ_OP_WRITE; - operation_flags = WRITE_FLUSH; + operation_flags = REQ_PREFLUSH; break; default: operation = 0; /* make gcc happy */ @@ -1272,7 +1272,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, nseg = req->operation == BLKIF_OP_INDIRECT ? req->u.indirect.nr_segments : req->u.rw.nr_segments; - if (unlikely(nseg == 0 && operation_flags != WRITE_FLUSH) || + if (unlikely(nseg == 0 && operation_flags != REQ_PREFLUSH) || unlikely((req->operation != BLKIF_OP_INDIRECT) && (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) || unlikely((req->operation == BLKIF_OP_INDIRECT) && @@ -1334,7 +1334,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, } /* Wait on all outstanding I/O's and once that has been completed - * issue the WRITE_FLUSH. + * issue the flush. */ if (drain) xen_blk_drain_io(pending_req->ring); @@ -1380,7 +1380,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, /* This will be hit if the operation was a flush or discard. */ if (!bio) { - BUG_ON(operation_flags != WRITE_FLUSH); + BUG_ON(operation_flags != REQ_PREFLUSH); bio = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 81d3db40cd7b..6fdd8e252760 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -297,7 +297,7 @@ static void bch_btree_node_read(struct btree *b) bio->bi_iter.bi_size = KEY_SIZE(&b->key) << 9; bio->bi_end_io = btree_node_read_endio; bio->bi_private = &cl; - bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC); + bio->bi_opf = REQ_OP_READ | REQ_META; bch_bio_map(bio, b->keys.set[0].data); @@ -393,7 +393,7 @@ static void do_btree_node_write(struct btree *b) b->bio->bi_end_io = btree_node_write_endio; b->bio->bi_private = cl; b->bio->bi_iter.bi_size = roundup(set_bytes(i), block_bytes(b->c)); - bio_set_op_attrs(b->bio, REQ_OP_WRITE, REQ_META|WRITE_SYNC|REQ_FUA); + b->bio->bi_opf = REQ_OP_WRITE | REQ_META | REQ_FUA; bch_bio_map(b->bio, i); /* diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 333a1e5f6ae6..1c9130ae0073 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -52,7 +52,7 @@ void bch_btree_verify(struct btree *b) bio->bi_bdev = PTR_CACHE(b->c, &b->key, 0)->bdev; bio->bi_iter.bi_sector = PTR_OFFSET(&b->key, 0); bio->bi_iter.bi_size = KEY_SIZE(&v->key) << 9; - bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC); + bio->bi_opf = REQ_OP_READ | REQ_META; bch_bio_map(bio, sorted); submit_bio_wait(bio); @@ -113,7 +113,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) check = bio_clone(bio, GFP_NOIO); if (!check) return; - bio_set_op_attrs(check, REQ_OP_READ, READ_SYNC); + check->bi_opf = REQ_OP_READ; if (bio_alloc_pages(check, GFP_NOIO)) goto out_put; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index e8a2b693c928..0d99b5f4b3e6 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -923,7 +923,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) flush->bi_bdev = bio->bi_bdev; flush->bi_end_io = request_endio; flush->bi_private = cl; - bio_set_op_attrs(flush, REQ_OP_WRITE, WRITE_FLUSH); + flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; closure_bio_submit(flush, cl); } diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 849ad441cd76..988edf928466 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -381,7 +381,7 @@ static char *uuid_read(struct cache_set *c, struct jset *j, struct closure *cl) return "bad uuid pointer"; bkey_copy(&c->uuid_bucket, k); - uuid_io(c, REQ_OP_READ, READ_SYNC, k, cl); + uuid_io(c, REQ_OP_READ, 0, k, cl); if (j->version < BCACHE_JSET_VERSION_UUIDv1) { struct uuid_entry_v0 *u0 = (void *) c->uuids; @@ -600,7 +600,7 @@ static void prio_read(struct cache *ca, uint64_t bucket) ca->prio_last_buckets[bucket_nr] = bucket; bucket_nr++; - prio_io(ca, bucket, REQ_OP_READ, READ_SYNC); + prio_io(ca, bucket, REQ_OP_READ, 0); if (p->csum != bch_crc64(&p->magic, bucket_bytes(ca) - 8)) pr_warn("bad csum reading priorities"); diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 125aedc3875f..b3ba142e59a4 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1316,7 +1316,7 @@ int dm_bufio_issue_flush(struct dm_bufio_client *c) { struct dm_io_request io_req = { .bi_op = REQ_OP_WRITE, - .bi_op_flags = WRITE_FLUSH, + .bi_op_flags = REQ_PREFLUSH, .mem.type = DM_IO_KMEM, .mem.ptr.addr = NULL, .client = c->dm_io, diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 07fc1ad42ec5..33e71ea6cc14 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -308,7 +308,7 @@ static int flush_header(struct log_c *lc) }; lc->io_req.bi_op = REQ_OP_WRITE; - lc->io_req.bi_op_flags = WRITE_FLUSH; + lc->io_req.bi_op_flags = REQ_PREFLUSH; return dm_io(&lc->io_req, 1, &null_location, NULL); } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index bdf1606f67bc..1a176d7c8b90 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -261,7 +261,7 @@ static int mirror_flush(struct dm_target *ti) struct mirror *m; struct dm_io_request io_req = { .bi_op = REQ_OP_WRITE, - .bi_op_flags = WRITE_FLUSH, + .bi_op_flags = REQ_PREFLUSH, .mem.type = DM_IO_KMEM, .mem.ptr.addr = NULL, .client = ms->io_client, @@ -657,7 +657,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio) struct mirror *m; struct dm_io_request io_req = { .bi_op = REQ_OP_WRITE, - .bi_op_flags = bio->bi_opf & WRITE_FLUSH_FUA, + .bi_op_flags = bio->bi_opf & (REQ_FUA | REQ_PREFLUSH), .mem.type = DM_IO_BIO, .mem.ptr.bio = bio, .notify.fn = write_callback, diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index b8cf956b577b..b93476c3ba3f 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -741,7 +741,7 @@ static void persistent_commit_exception(struct dm_exception_store *store, /* * Commit exceptions to disk. */ - if (ps->valid && area_io(ps, REQ_OP_WRITE, WRITE_FLUSH_FUA)) + if (ps->valid && area_io(ps, REQ_OP_WRITE, REQ_PREFLUSH | REQ_FUA)) ps->valid = 0; /* @@ -818,7 +818,7 @@ static int persistent_commit_merge(struct dm_exception_store *store, for (i = 0; i < nr_merged; i++) clear_exception(ps, ps->current_committed - 1 - i); - r = area_io(ps, REQ_OP_WRITE, WRITE_FLUSH_FUA); + r = area_io(ps, REQ_OP_WRITE, REQ_PREFLUSH | REQ_FUA); if (r < 0) return r; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 147af9536d0c..b2abfa41af3e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1527,7 +1527,7 @@ static struct mapped_device *alloc_dev(int minor) bio_init(&md->flush_bio); md->flush_bio.bi_bdev = md->bdev; - bio_set_op_attrs(&md->flush_bio, REQ_OP_WRITE, WRITE_FLUSH); + md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; dm_stats_init(&md->stats); diff --git a/drivers/md/md.c b/drivers/md/md.c index eac84d8ff724..b69ec7da4bae 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -394,7 +394,7 @@ static void submit_flushes(struct work_struct *ws) bi->bi_end_io = md_end_flush; bi->bi_private = rdev; bi->bi_bdev = rdev->bdev; - bio_set_op_attrs(bi, REQ_OP_WRITE, WRITE_FLUSH); + bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; atomic_inc(&mddev->flush_pending); submit_bio(bi); rcu_read_lock(); @@ -743,7 +743,7 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, bio_add_page(bio, page, size, 0); bio->bi_private = rdev; bio->bi_end_io = super_written; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH_FUA); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA; atomic_inc(&mddev->pending_writes); submit_bio(bio); diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 1b1ab4a1d132..28d015c6fffe 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -685,7 +685,7 @@ void r5l_flush_stripe_to_raid(struct r5l_log *log) bio_reset(&log->flush_bio); log->flush_bio.bi_bdev = log->rdev->bdev; log->flush_bio.bi_end_io = r5l_log_flush_endio; - bio_set_op_attrs(&log->flush_bio, REQ_OP_WRITE, WRITE_FLUSH); + log->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; submit_bio(&log->flush_bio); } @@ -1053,7 +1053,7 @@ static int r5l_log_write_empty_meta_block(struct r5l_log *log, sector_t pos, mb->checksum = cpu_to_le32(crc); if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE, - WRITE_FUA, false)) { + REQ_FUA, false)) { __free_page(page); return -EIO; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 92ac251e91e6..70acdd379e44 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -913,7 +913,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) { op = REQ_OP_WRITE; if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags)) - op_flags = WRITE_FUA; + op_flags = REQ_FUA; if (test_bit(R5_Discard, &sh->dev[i].flags)) op = REQ_OP_DISCARD; } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index 4a96c2049b7b..c2784cfc5e29 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -58,7 +58,7 @@ static void nvmet_execute_rw(struct nvmet_req *req) if (req->cmd->rw.opcode == nvme_cmd_write) { op = REQ_OP_WRITE; - op_flags = WRITE_ODIRECT; + op_flags = REQ_SYNC | REQ_IDLE; if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) op_flags |= REQ_FUA; } else { @@ -109,7 +109,7 @@ static void nvmet_execute_flush(struct nvmet_req *req) bio->bi_bdev = req->ns->bdev; bio->bi_private = req; bio->bi_end_io = nvmet_bio_done; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; submit_bio(bio); } diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 372d744315f3..d316ed537d59 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -388,7 +388,7 @@ iblock_execute_sync_cache(struct se_cmd *cmd) bio = bio_alloc(GFP_KERNEL, 0); bio->bi_end_io = iblock_end_io_flush; bio->bi_bdev = ib_dev->ibd_bd; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; if (!immed) bio->bi_private = cmd; submit_bio(bio); @@ -686,15 +686,15 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, struct iblock_dev *ib_dev = IBLOCK_DEV(dev); struct request_queue *q = bdev_get_queue(ib_dev->ibd_bd); /* - * Force writethrough using WRITE_FUA if a volatile write cache + * Force writethrough using REQ_FUA if a volatile write cache * is not enabled, or if initiator set the Force Unit Access bit. */ op = REQ_OP_WRITE; if (test_bit(QUEUE_FLAG_FUA, &q->queue_flags)) { if (cmd->se_cmd_flags & SCF_FUA) - op_flags = WRITE_FUA; + op_flags = REQ_FUA; else if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) - op_flags = WRITE_FUA; + op_flags = REQ_FUA; } } else { op = REQ_OP_READ; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c8454a8e35f2..fe10afd51e02 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3485,9 +3485,9 @@ static int write_dev_supers(struct btrfs_device *device, * to go down lazy. */ if (i == 0) - ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_FUA, bh); + ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_FUA, bh); else - ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh); + ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); if (ret) errors++; } @@ -3551,7 +3551,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait) bio->bi_end_io = btrfs_end_empty_barrier; bio->bi_bdev = device->bdev; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; init_completion(&device->flush_wait); bio->bi_private = &device->flush_wait; device->flush_bio = bio; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 66a755150056..ff87bff7bdb6 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -127,7 +127,7 @@ struct extent_page_data { */ unsigned int extent_locked:1; - /* tells the submit_bio code to use a WRITE_SYNC */ + /* tells the submit_bio code to use REQ_SYNC */ unsigned int sync_io:1; }; @@ -2047,7 +2047,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, return -EIO; } bio->bi_bdev = dev->bdev; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC); + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; bio_add_page(bio, page, length, pg_offset); if (btrfsic_submit_bio_wait(bio)) { @@ -2388,7 +2388,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, struct inode *inode = page->mapping->host; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct bio *bio; - int read_mode; + int read_mode = 0; int ret; BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); @@ -2404,9 +2404,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, } if (failed_bio->bi_vcnt > 1) - read_mode = READ_SYNC | REQ_FAILFAST_DEV; - else - read_mode = READ_SYNC; + read_mode |= REQ_FAILFAST_DEV; phy_offset >>= inode->i_sb->s_blocksize_bits; bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, @@ -3484,7 +3482,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, unsigned long nr_written = 0; if (wbc->sync_mode == WB_SYNC_ALL) - write_flags = WRITE_SYNC; + write_flags = REQ_SYNC; trace___extent_writepage(page, inode, wbc); @@ -3729,7 +3727,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, unsigned long i, num_pages; unsigned long bio_flags = 0; unsigned long start, end; - int write_flags = (epd->sync_io ? WRITE_SYNC : 0) | REQ_META; + int write_flags = (epd->sync_io ? REQ_SYNC : 0) | REQ_META; int ret = 0; clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); @@ -4076,7 +4074,7 @@ static void flush_epd_write_bio(struct extent_page_data *epd) int ret; bio_set_op_attrs(epd->bio, REQ_OP_WRITE, - epd->sync_io ? WRITE_SYNC : 0); + epd->sync_io ? REQ_SYNC : 0); ret = submit_one_bio(epd->bio, 0, epd->bio_flags); BUG_ON(ret < 0); /* -ENOMEM */ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9a377079af26..c8eb82a416b3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7917,7 +7917,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, struct io_failure_record *failrec; struct bio *bio; int isector; - int read_mode; + int read_mode = 0; int ret; BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); @@ -7936,9 +7936,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio, if ((failed_bio->bi_vcnt > 1) || (failed_bio->bi_io_vec->bv_len > BTRFS_I(inode)->root->sectorsize)) - read_mode = READ_SYNC | REQ_FAILFAST_DEV; - else - read_mode = READ_SYNC; + read_mode |= REQ_FAILFAST_DEV; isector = start - btrfs_io_bio(failed_bio)->logical; isector >>= inode->i_sb->s_blocksize_bits; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index fffb9ab8526e..ff3078234d94 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -4440,7 +4440,7 @@ static int write_page_nocow(struct scrub_ctx *sctx, bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = physical_for_dev_replace >> 9; bio->bi_bdev = dev->bdev; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC); + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; ret = bio_add_page(bio, page, PAGE_SIZE, 0); if (ret != PAGE_SIZE) { leave_with_eio: diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index deda46cf1292..0d7d635d8bfb 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6023,7 +6023,7 @@ static void btrfs_end_bio(struct bio *bio) else btrfs_dev_stat_inc(dev, BTRFS_DEV_STAT_READ_ERRS); - if ((bio->bi_opf & WRITE_FLUSH) == WRITE_FLUSH) + if (bio->bi_opf & REQ_PREFLUSH) btrfs_dev_stat_inc(dev, BTRFS_DEV_STAT_FLUSH_ERRS); btrfs_dev_stat_print_on_error(dev); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 09ed29c67848..f137ffe6654c 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -62,7 +62,7 @@ struct btrfs_device { int running_pending; /* regular prio bios */ struct btrfs_pending_bios pending_bios; - /* WRITE_SYNC bios */ + /* sync bios */ struct btrfs_pending_bios pending_sync_bios; struct block_device *bdev; diff --git a/fs/buffer.c b/fs/buffer.c index a29335867e30..bc7c2bb30a9b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -753,7 +753,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) * still in flight on potentially older * contents. */ - write_dirty_buffer(bh, WRITE_SYNC); + write_dirty_buffer(bh, REQ_SYNC); /* * Kick off IO for the previous mapping. Note @@ -1684,7 +1684,7 @@ static struct buffer_head *create_page_buffers(struct page *page, struct inode * * prevents this contention from occurring. * * If block_write_full_page() is called with wbc->sync_mode == - * WB_SYNC_ALL, the writes are posted using WRITE_SYNC; this + * WB_SYNC_ALL, the writes are posted using REQ_SYNC; this * causes the writes to be flagged as synchronous writes. */ int __block_write_full_page(struct inode *inode, struct page *page, @@ -1697,7 +1697,7 @@ int __block_write_full_page(struct inode *inode, struct page *page, struct buffer_head *bh, *head; unsigned int blocksize, bbits; int nr_underway = 0; - int write_flags = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0); + int write_flags = (wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0); head = create_page_buffers(page, inode, (1 << BH_Dirty)|(1 << BH_Uptodate)); @@ -3210,7 +3210,7 @@ EXPORT_SYMBOL(__sync_dirty_buffer); int sync_dirty_buffer(struct buffer_head *bh) { - return __sync_dirty_buffer(bh, WRITE_SYNC); + return __sync_dirty_buffer(bh, REQ_SYNC); } EXPORT_SYMBOL(sync_dirty_buffer); diff --git a/fs/direct-io.c b/fs/direct-io.c index fb9aa16a7727..a5138c564019 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1209,7 +1209,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, dio->inode = inode; if (iov_iter_rw(iter) == WRITE) { dio->op = REQ_OP_WRITE; - dio->op_flags = WRITE_ODIRECT; + dio->op_flags = REQ_SYNC | REQ_IDLE; } else { dio->op = REQ_OP_READ; } diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index d89754ef1aab..eb9835638680 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -35,7 +35,7 @@ static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) } /* - * Write the MMP block using WRITE_SYNC to try to get the block on-disk + * Write the MMP block using REQ_SYNC to try to get the block on-disk * faster. */ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) @@ -52,7 +52,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) lock_buffer(bh); bh->b_end_io = end_buffer_write_sync; get_bh(bh); - submit_bh(REQ_OP_WRITE, WRITE_SYNC | REQ_META | REQ_PRIO, bh); + submit_bh(REQ_OP_WRITE, REQ_SYNC | REQ_META | REQ_PRIO, bh); wait_on_buffer(bh); sb_end_write(sb); if (unlikely(!buffer_uptodate(bh))) @@ -88,7 +88,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, get_bh(*bh); lock_buffer(*bh); (*bh)->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, READ_SYNC | REQ_META | REQ_PRIO, *bh); + submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, *bh); wait_on_buffer(*bh); if (!buffer_uptodate(*bh)) { ret = -EIO; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 0094923e5ebf..e0b3b54cdef3 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -340,7 +340,7 @@ void ext4_io_submit(struct ext4_io_submit *io) if (bio) { int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ? - WRITE_SYNC : 0; + REQ_SYNC : 0; bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags); submit_bio(io->io_bio); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6db81fbcbaa6..f31eb286af90 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4553,7 +4553,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) unlock_buffer(sbh); if (sync) { error = __sync_dirty_buffer(sbh, - test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC); + test_opt(sb, BARRIER) ? REQ_FUA : REQ_SYNC); if (error) return error; diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 7e9b504bd8b2..d935c06a84f0 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -65,7 +65,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, .sbi = sbi, .type = META, .op = REQ_OP_READ, - .op_flags = READ_SYNC | REQ_META | REQ_PRIO, + .op_flags = REQ_META | REQ_PRIO, .old_blkaddr = index, .new_blkaddr = index, .encrypted_page = NULL, @@ -160,7 +160,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, .sbi = sbi, .type = META, .op = REQ_OP_READ, - .op_flags = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : REQ_RAHEAD, + .op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD, .encrypted_page = NULL, }; struct blk_plug plug; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9ae194fd2fdb..b80bf10603d7 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -198,11 +198,9 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, if (type >= META_FLUSH) { io->fio.type = META_FLUSH; io->fio.op = REQ_OP_WRITE; - if (test_opt(sbi, NOBARRIER)) - io->fio.op_flags = WRITE_FLUSH | REQ_META | REQ_PRIO; - else - io->fio.op_flags = WRITE_FLUSH_FUA | REQ_META | - REQ_PRIO; + io->fio.op_flags = REQ_PREFLUSH | REQ_META | REQ_PRIO; + if (!test_opt(sbi, NOBARRIER)) + io->fio.op_flags |= REQ_FUA; } __submit_merged_bio(io); out: @@ -483,7 +481,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index) return page; f2fs_put_page(page, 0); - page = get_read_data_page(inode, index, READ_SYNC, false); + page = get_read_data_page(inode, index, 0, false); if (IS_ERR(page)) return page; @@ -509,7 +507,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct page *page; repeat: - page = get_read_data_page(inode, index, READ_SYNC, for_write); + page = get_read_data_page(inode, index, 0, for_write); if (IS_ERR(page)) return page; @@ -1251,7 +1249,7 @@ static int f2fs_write_data_page(struct page *page, .sbi = sbi, .type = DATA, .op = REQ_OP_WRITE, - .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0, + .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? REQ_SYNC : 0, .page = page, .encrypted_page = NULL, }; @@ -1663,7 +1661,7 @@ repeat: err = PTR_ERR(bio); goto fail; } - bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC); + bio->bi_opf = REQ_OP_READ; if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); err = -EFAULT; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 93985c64d8a8..9eb11b2244ea 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -550,7 +550,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) .sbi = F2FS_I_SB(inode), .type = DATA, .op = REQ_OP_READ, - .op_flags = READ_SYNC, + .op_flags = 0, .encrypted_page = NULL, }; struct dnode_of_data dn; @@ -625,7 +625,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) f2fs_wait_on_page_writeback(dn.node_page, NODE, true); fio.op = REQ_OP_WRITE; - fio.op_flags = WRITE_SYNC; + fio.op_flags = REQ_SYNC; fio.new_blkaddr = newaddr; f2fs_submit_page_mbio(&fio); @@ -663,7 +663,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type) .sbi = F2FS_I_SB(inode), .type = DATA, .op = REQ_OP_WRITE, - .op_flags = WRITE_SYNC, + .op_flags = REQ_SYNC, .page = page, .encrypted_page = NULL, }; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 5f1a67f756af..2e7f54c191b4 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -111,7 +111,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) .sbi = F2FS_I_SB(dn->inode), .type = DATA, .op = REQ_OP_WRITE, - .op_flags = WRITE_SYNC | REQ_PRIO, + .op_flags = REQ_SYNC | REQ_PRIO, .page = page, .encrypted_page = NULL, }; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 01177ecdeab8..932f3f8bb57b 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1134,7 +1134,7 @@ repeat: if (!page) return ERR_PTR(-ENOMEM); - err = read_node_page(page, READ_SYNC); + err = read_node_page(page, 0); if (err < 0) { f2fs_put_page(page, 1); return ERR_PTR(err); @@ -1570,7 +1570,7 @@ static int f2fs_write_node_page(struct page *page, .sbi = sbi, .type = NODE, .op = REQ_OP_WRITE, - .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0, + .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? REQ_SYNC : 0, .page = page, .encrypted_page = NULL, }; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fc886f008449..f1b4a1775ebe 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -259,7 +259,7 @@ static int __commit_inmem_pages(struct inode *inode, .sbi = sbi, .type = DATA, .op = REQ_OP_WRITE, - .op_flags = WRITE_SYNC | REQ_PRIO, + .op_flags = REQ_SYNC | REQ_PRIO, .encrypted_page = NULL, }; bool submit_bio = false; @@ -420,7 +420,7 @@ repeat: fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); bio->bi_bdev = sbi->sb->s_bdev; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; ret = submit_bio_wait(bio); llist_for_each_entry_safe(cmd, next, @@ -454,7 +454,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) atomic_inc(&fcc->submit_flush); bio->bi_bdev = sbi->sb->s_bdev; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; ret = submit_bio_wait(bio); atomic_dec(&fcc->submit_flush); bio_put(bio); @@ -1515,7 +1515,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) .sbi = sbi, .type = META, .op = REQ_OP_WRITE, - .op_flags = WRITE_SYNC | REQ_META | REQ_PRIO, + .op_flags = REQ_SYNC | REQ_META | REQ_PRIO, .old_blkaddr = page->index, .new_blkaddr = page->index, .page = page, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6132b4ce4e4c..2cac6bb86080 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1238,7 +1238,7 @@ static int __f2fs_commit_super(struct buffer_head *bh, unlock_buffer(bh); /* it's rare case, we can do fua all the time */ - return __sync_dirty_buffer(bh, WRITE_FLUSH_FUA); + return __sync_dirty_buffer(bh, REQ_PREFLUSH | REQ_FUA); } static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index e58ccef09c91..27c00a16def0 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -657,7 +657,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) struct gfs2_log_header *lh; unsigned int tail; u32 hash; - int op_flags = WRITE_FLUSH_FUA | REQ_META; + int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META; struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO); enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); lh = page_address(page); @@ -682,7 +682,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) { gfs2_ordered_wait(sdp); log_flush_wait(sdp); - op_flags = WRITE_SYNC | REQ_META | REQ_PRIO; + op_flags = REQ_SYNC | REQ_META | REQ_PRIO; } sdp->sd_log_idle = (tail == sdp->sd_log_flush_head); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 373639a59782..e562b1191c9c 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -38,7 +38,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb struct buffer_head *bh, *head; int nr_underway = 0; int write_flags = REQ_META | REQ_PRIO | - (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0); + (wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0); BUG_ON(!PageLocked(page)); BUG_ON(!page_has_buffers(page)); @@ -285,7 +285,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, } } - gfs2_submit_bhs(REQ_OP_READ, READ_SYNC | REQ_META | REQ_PRIO, bhs, num); + gfs2_submit_bhs(REQ_OP_READ, REQ_META | REQ_PRIO, bhs, num); if (!(flags & DIO_WAIT)) return 0; @@ -453,7 +453,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) if (buffer_uptodate(first_bh)) goto out; if (!buffer_locked(first_bh)) - ll_rw_block(REQ_OP_READ, READ_SYNC | REQ_META, 1, &first_bh); + ll_rw_block(REQ_OP_READ, REQ_META, 1, &first_bh); dblock++; extlen--; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index ff72ac6439c8..a34308df927f 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -246,7 +246,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) bio->bi_end_io = end_bio_io_page; bio->bi_private = page; - bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC | REQ_META); + bio_set_op_attrs(bio, REQ_OP_READ, REQ_META); submit_bio(bio); wait_on_page_locked(page); bio_put(bio); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 11854dd84572..67aedf4c2e7c 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -221,7 +221,7 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait) error2 = hfsplus_submit_bio(sb, sbi->part_start + HFSPLUS_VOLHEAD_SECTOR, sbi->s_vhdr_buf, NULL, REQ_OP_WRITE, - WRITE_SYNC); + REQ_SYNC); if (!error) error = error2; if (!write_backup) @@ -230,7 +230,7 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait) error2 = hfsplus_submit_bio(sb, sbi->part_start + sbi->sect_count - 2, sbi->s_backup_vhdr_buf, NULL, REQ_OP_WRITE, - WRITE_SYNC); + REQ_SYNC); if (!error) error2 = error; out: diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 684996c8a3a4..4055f51617ef 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -186,7 +186,7 @@ __flush_batch(journal_t *journal, int *batch_count) blk_start_plug(&plug); for (i = 0; i < *batch_count; i++) - write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE_SYNC); + write_dirty_buffer(journal->j_chkpt_bhs[i], REQ_SYNC); blk_finish_plug(&plug); for (i = 0; i < *batch_count; i++) { diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 31f8ca046639..8c514367ba5a 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -155,9 +155,10 @@ static int journal_submit_commit_record(journal_t *journal, if (journal->j_flags & JBD2_BARRIER && !jbd2_has_feature_async_commit(journal)) - ret = submit_bh(REQ_OP_WRITE, WRITE_SYNC | WRITE_FLUSH_FUA, bh); + ret = submit_bh(REQ_OP_WRITE, + REQ_SYNC | REQ_PREFLUSH | REQ_FUA, bh); else - ret = submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh); + ret = submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); *cbh = bh; return ret; @@ -402,7 +403,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) jbd2_journal_update_sb_log_tail(journal, journal->j_tail_sequence, journal->j_tail, - WRITE_SYNC); + REQ_SYNC); mutex_unlock(&journal->j_checkpoint_mutex); } else { jbd_debug(3, "superblock not updated\n"); @@ -717,7 +718,7 @@ start_journal_io: clear_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; - submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh); + submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); } cond_resched(); stats.run.rs_blocks_logged += bufs; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 927da4956a89..8ed971eeab44 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -913,7 +913,7 @@ int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) * space and if we lose sb update during power failure we'd replay * old transaction with possibly newly overwritten data. */ - ret = jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA); + ret = jbd2_journal_update_sb_log_tail(journal, tid, block, REQ_FUA); if (ret) goto out; @@ -1306,7 +1306,7 @@ static int journal_reset(journal_t *journal) /* Lock here to make assertions happy... */ mutex_lock(&journal->j_checkpoint_mutex); /* - * Update log tail information. We use WRITE_FUA since new + * Update log tail information. We use REQ_FUA since new * transaction will start reusing journal space and so we * must make sure information about current log tail is on * disk before that. @@ -1314,7 +1314,7 @@ static int journal_reset(journal_t *journal) jbd2_journal_update_sb_log_tail(journal, journal->j_tail_sequence, journal->j_tail, - WRITE_FUA); + REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } return jbd2_journal_start_thread(journal); @@ -1454,7 +1454,7 @@ void jbd2_journal_update_sb_errno(journal_t *journal) sb->s_errno = cpu_to_be32(journal->j_errno); read_unlock(&journal->j_state_lock); - jbd2_write_superblock(journal, WRITE_FUA); + jbd2_write_superblock(journal, REQ_FUA); } EXPORT_SYMBOL(jbd2_journal_update_sb_errno); @@ -1720,7 +1720,8 @@ int jbd2_journal_destroy(journal_t *journal) ++journal->j_transaction_sequence; write_unlock(&journal->j_state_lock); - jbd2_mark_journal_empty(journal, WRITE_FLUSH_FUA); + jbd2_mark_journal_empty(journal, + REQ_PREFLUSH | REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } else err = -EIO; @@ -1979,7 +1980,7 @@ int jbd2_journal_flush(journal_t *journal) * the magic code for a fully-recovered superblock. Any future * commits of data to the journal will restore the current * s_start value. */ - jbd2_mark_journal_empty(journal, WRITE_FUA); + jbd2_mark_journal_empty(journal, REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); write_lock(&journal->j_state_lock); J_ASSERT(!journal->j_running_transaction); @@ -2025,7 +2026,7 @@ int jbd2_journal_wipe(journal_t *journal, int write) if (write) { /* Lock to make assertions happy... */ mutex_lock(&journal->j_checkpoint_mutex); - jbd2_mark_journal_empty(journal, WRITE_FUA); + jbd2_mark_journal_empty(journal, REQ_FUA); mutex_unlock(&journal->j_checkpoint_mutex); } diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 91171dc352cb..cfc38b552118 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -648,7 +648,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(descriptor); BUFFER_TRACE(descriptor, "write"); set_buffer_dirty(descriptor); - write_dirty_buffer(descriptor, WRITE_SYNC); + write_dirty_buffer(descriptor, REQ_SYNC); } #endif diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index a21ea8b3e5fa..bb1da1feafeb 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -2002,7 +2002,7 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) bio->bi_end_io = lbmIODone; bio->bi_private = bp; - bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC); + bio->bi_opf = REQ_OP_READ; /*check if journaling to disk has been disabled*/ if (log->no_integrity) { bio->bi_iter.bi_size = 0; @@ -2146,7 +2146,7 @@ static void lbmStartIO(struct lbuf * bp) bio->bi_end_io = lbmIODone; bio->bi_private = bp; - bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC); + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; /* check if journaling to disk has been disabled */ if (log->no_integrity) { diff --git a/fs/mpage.c b/fs/mpage.c index d2413af0823a..f35e2819d0c6 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -489,7 +489,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc, struct buffer_head map_bh; loff_t i_size = i_size_read(inode); int ret = 0; - int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0); + int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0); if (page_has_buffers(page)) { struct buffer_head *head = page_buffers(page); @@ -705,7 +705,7 @@ mpage_writepages(struct address_space *mapping, ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd); if (mpd.bio) { int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? - WRITE_SYNC : 0); + REQ_SYNC : 0); mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio); } } @@ -726,7 +726,7 @@ int mpage_writepage(struct page *page, get_block_t get_block, int ret = __mpage_writepage(page, wbc, &mpd); if (mpd.bio) { int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? - WRITE_SYNC : 0); + REQ_SYNC : 0); mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio); } return ret; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index c95d369e90aa..12eeae62a2b1 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -189,7 +189,7 @@ static int nilfs_sync_super(struct super_block *sb, int flag) set_buffer_dirty(nilfs->ns_sbh[0]); if (nilfs_test_opt(nilfs, BARRIER)) { err = __sync_dirty_buffer(nilfs->ns_sbh[0], - WRITE_SYNC | WRITE_FLUSH_FUA); + REQ_SYNC | REQ_PREFLUSH | REQ_FUA); } else { err = sync_dirty_buffer(nilfs->ns_sbh[0]); } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 636abcbd4650..52eef16edb01 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -627,7 +627,7 @@ static int o2hb_issue_node_write(struct o2hb_region *reg, slot = o2nm_this_node(); bio = o2hb_setup_one_bio(reg, write_wc, &slot, slot+1, REQ_OP_WRITE, - WRITE_SYNC); + REQ_SYNC); if (IS_ERR(bio)) { status = PTR_ERR(bio); mlog_errno(status); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index bc2dde2423c2..aa40c242f1db 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1111,7 +1111,8 @@ static int flush_commit_list(struct super_block *s, mark_buffer_dirty(jl->j_commit_bh) ; depth = reiserfs_write_unlock_nested(s); if (reiserfs_barrier_flush(s)) - __sync_dirty_buffer(jl->j_commit_bh, WRITE_FLUSH_FUA); + __sync_dirty_buffer(jl->j_commit_bh, + REQ_PREFLUSH | REQ_FUA); else sync_dirty_buffer(jl->j_commit_bh); reiserfs_write_lock_nested(s, depth); @@ -1269,7 +1270,8 @@ static int _update_journal_header_block(struct super_block *sb, depth = reiserfs_write_unlock_nested(sb); if (reiserfs_barrier_flush(sb)) - __sync_dirty_buffer(journal->j_header_bh, WRITE_FLUSH_FUA); + __sync_dirty_buffer(journal->j_header_bh, + REQ_PREFLUSH | REQ_FUA); else sync_dirty_buffer(journal->j_header_bh); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 3e57a56cf829..594e02c485b2 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -495,8 +495,10 @@ xfs_submit_ioend( ioend->io_bio->bi_private = ioend; ioend->io_bio->bi_end_io = xfs_end_bio; - bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE, - (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0); + ioend->io_bio->bi_opf = REQ_OP_WRITE; + if (wbc->sync_mode == WB_SYNC_ALL) + ioend->io_bio->bi_opf |= REQ_SYNC; + /* * If we are failing the IO now, just mark the ioend with an * error and finish it. This will run IO completion immediately @@ -567,8 +569,9 @@ xfs_chain_bio( bio_chain(ioend->io_bio, new); bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ - bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE, - (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0); + ioend->io_bio->bi_opf = REQ_OP_WRITE; + if (wbc->sync_mode == WB_SYNC_ALL) + ioend->io_bio->bi_opf |= REQ_SYNC; submit_bio(ioend->io_bio); ioend->io_bio = new; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index b5b9bffe3520..33c435f3316c 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1304,7 +1304,7 @@ _xfs_buf_ioapply( if (bp->b_flags & XBF_WRITE) { op = REQ_OP_WRITE; if (bp->b_flags & XBF_SYNCIO) - op_flags = WRITE_SYNC; + op_flags = REQ_SYNC; if (bp->b_flags & XBF_FUA) op_flags |= REQ_FUA; if (bp->b_flags & XBF_FLUSH) diff --git a/include/linux/fs.h b/include/linux/fs.h index 46a74209917f..7a1b78ab7c15 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -151,58 +151,11 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, */ #define CHECK_IOVEC_ONLY -1 -/* - * The below are the various read and write flags that we support. Some of - * them include behavioral modifiers that send information down to the - * block layer and IO scheduler. They should be used along with a req_op. - * Terminology: - * - * The block layer uses device plugging to defer IO a little bit, in - * the hope that we will see more IO very shortly. This increases - * coalescing of adjacent IO and thus reduces the number of IOs we - * have to send to the device. It also allows for better queuing, - * if the IO isn't mergeable. If the caller is going to be waiting - * for the IO, then he must ensure that the device is unplugged so - * that the IO is dispatched to the driver. - * - * All IO is handled async in Linux. This is fine for background - * writes, but for reads or writes that someone waits for completion - * on, we want to notify the block layer and IO scheduler so that they - * know about it. That allows them to make better scheduling - * decisions. So when the below references 'sync' and 'async', it - * is referencing this priority hint. - * - * With that in mind, the available types are: - * - * READ A normal read operation. Device will be plugged. - * READ_SYNC A synchronous read. Device is not plugged, caller can - * immediately wait on this read without caring about - * unplugging. - * WRITE A normal async write. Device will be plugged. - * WRITE_SYNC Synchronous write. Identical to WRITE, but passes down - * the hint that someone will be waiting on this IO - * shortly. The write equivalent of READ_SYNC. - * WRITE_ODIRECT Special case write for O_DIRECT only. - * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. - * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on - * non-volatile media on completion. - * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded - * by a cache flush and data is guaranteed to be on - * non-volatile media on completion. - * - */ #define RW_MASK REQ_OP_WRITE #define READ REQ_OP_READ #define WRITE REQ_OP_WRITE -#define READ_SYNC 0 -#define WRITE_SYNC REQ_SYNC -#define WRITE_ODIRECT (REQ_SYNC | REQ_IDLE) -#define WRITE_FLUSH REQ_PREFLUSH -#define WRITE_FUA REQ_FUA -#define WRITE_FLUSH_FUA (REQ_PREFLUSH | REQ_FUA) - /* * Attribute flags. These should be or-ed together to figure out what * has been changed! diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index a9d34424450d..5da2c829a718 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -55,7 +55,7 @@ TRACE_DEFINE_ENUM(CP_DISCARD); { IPU, "IN-PLACE" }, \ { OPU, "OUT-OF-PLACE" }) -#define F2FS_BIO_FLAG_MASK(t) (t & (REQ_RAHEAD | WRITE_FLUSH_FUA)) +#define F2FS_BIO_FLAG_MASK(t) (t & (REQ_RAHEAD | REQ_PREFLUSH | REQ_FUA)) #define F2FS_BIO_EXTRA_MASK(t) (t & (REQ_META | REQ_PRIO)) #define show_bio_type(op_flags) show_bio_op_flags(op_flags), \ @@ -65,11 +65,9 @@ TRACE_DEFINE_ENUM(CP_DISCARD); __print_symbolic(F2FS_BIO_FLAG_MASK(flags), \ { 0, "WRITE" }, \ { REQ_RAHEAD, "READAHEAD" }, \ - { READ_SYNC, "READ_SYNC" }, \ - { WRITE_SYNC, "WRITE_SYNC" }, \ - { WRITE_FLUSH, "WRITE_FLUSH" }, \ - { WRITE_FUA, "WRITE_FUA" }, \ - { WRITE_FLUSH_FUA, "WRITE_FLUSH_FUA" }) + { REQ_SYNC, "REQ_SYNC" }, \ + { REQ_PREFLUSH, "REQ_PREFLUSH" }, \ + { REQ_FUA, "REQ_FUA" }) #define show_bio_extra(type) \ __print_symbolic(F2FS_BIO_EXTRA_MASK(type), \ diff --git a/kernel/power/swap.c b/kernel/power/swap.c index a3b1e617bcdc..32e0c232efba 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -307,7 +307,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) { int error; - hib_submit_io(REQ_OP_READ, READ_SYNC, swsusp_resume_block, + hib_submit_io(REQ_OP_READ, 0, swsusp_resume_block, swsusp_header, NULL); if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { @@ -317,7 +317,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) swsusp_header->flags = flags; if (flags & SF_CRC32_MODE) swsusp_header->crc32 = handle->crc32; - error = hib_submit_io(REQ_OP_WRITE, WRITE_SYNC, + error = hib_submit_io(REQ_OP_WRITE, REQ_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { printk(KERN_ERR "PM: Swap header not found!\n"); @@ -397,7 +397,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb) } else { src = buf; } - return hib_submit_io(REQ_OP_WRITE, WRITE_SYNC, offset, src, hb); + return hib_submit_io(REQ_OP_WRITE, REQ_SYNC, offset, src, hb); } static void release_swap_writer(struct swap_map_handle *handle) @@ -1000,8 +1000,7 @@ static int get_swap_reader(struct swap_map_handle *handle, return -ENOMEM; } - error = hib_submit_io(REQ_OP_READ, READ_SYNC, offset, - tmp->map, NULL); + error = hib_submit_io(REQ_OP_READ, 0, offset, tmp->map, NULL); if (error) { release_swap_reader(handle); return error; @@ -1025,7 +1024,7 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf, offset = handle->cur->entries[handle->k]; if (!offset) return -EFAULT; - error = hib_submit_io(REQ_OP_READ, READ_SYNC, offset, buf, hb); + error = hib_submit_io(REQ_OP_READ, 0, offset, buf, hb); if (error) return error; if (++handle->k >= MAP_PAGE_ENTRIES) { @@ -1534,7 +1533,7 @@ int swsusp_check(void) if (!IS_ERR(hib_resume_bdev)) { set_blocksize(hib_resume_bdev, PAGE_SIZE); clear_page(swsusp_header); - error = hib_submit_io(REQ_OP_READ, READ_SYNC, + error = hib_submit_io(REQ_OP_READ, 0, swsusp_resume_block, swsusp_header, NULL); if (error) @@ -1543,7 +1542,7 @@ int swsusp_check(void) if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) { memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); /* Reset swap signature now */ - error = hib_submit_io(REQ_OP_WRITE, WRITE_SYNC, + error = hib_submit_io(REQ_OP_WRITE, REQ_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { @@ -1588,11 +1587,11 @@ int swsusp_unmark(void) { int error; - hib_submit_io(REQ_OP_READ, READ_SYNC, swsusp_resume_block, + hib_submit_io(REQ_OP_READ, 0, swsusp_resume_block, swsusp_header, NULL); if (!memcmp(HIBERNATE_SIG,swsusp_header->sig, 10)) { memcpy(swsusp_header->sig,swsusp_header->orig_sig, 10); - error = hib_submit_io(REQ_OP_WRITE, WRITE_SYNC, + error = hib_submit_io(REQ_OP_WRITE, REQ_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { -- cgit v1.2.3 From d38499530e5f170d30f32d3841fade204e63081d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:11 -0600 Subject: fs: decouple READ and WRITE from the block layer ops Move READ and WRITE to kernel.h and don't define them in terms of block layer ops; they are our generic data direction indicators these days and have no more resemblance with the block layer ops. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 ++++++ include/linux/fs.h | 13 ------------- include/linux/kernel.h | 4 ++++ include/linux/uio.h | 2 +- 4 files changed, 11 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 87ce64dafb93..fe9a17017608 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -62,6 +62,12 @@ #define bio_sectors(bio) ((bio)->bi_iter.bi_size >> 9) #define bio_end_sector(bio) ((bio)->bi_iter.bi_sector + bio_sectors((bio))) +/* + * Return the data direction, READ or WRITE. + */ +#define bio_data_dir(bio) \ + (op_is_write(bio_op(bio)) ? WRITE : READ) + /* * Check whether this bio carries any data or not. A NULL bio is allowed. */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 7a1b78ab7c15..0ad36e0c7fa7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -151,11 +151,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, */ #define CHECK_IOVEC_ONLY -1 -#define RW_MASK REQ_OP_WRITE - -#define READ REQ_OP_READ -#define WRITE REQ_OP_WRITE - /* * Attribute flags. These should be or-ed together to figure out what * has been changed! @@ -2452,14 +2447,6 @@ extern void make_bad_inode(struct inode *); extern bool is_bad_inode(struct inode *); #ifdef CONFIG_BLOCK -/* - * return data direction, READ or WRITE - */ -static inline int bio_data_dir(struct bio *bio) -{ - return op_is_write(bio_op(bio)) ? WRITE : READ; -} - extern void check_disk_size_change(struct gendisk *disk, struct block_device *bdev); extern int revalidate_disk(struct gendisk *); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index bc6ed52a39b9..01b6b460c34d 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -50,6 +50,10 @@ #define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) #define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) +/* generic data direction definitions */ +#define READ 0 +#define WRITE 1 + #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) #define u64_to_user_ptr(x) ( \ diff --git a/include/linux/uio.h b/include/linux/uio.h index 6e22b544d039..d5aba1512b8b 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -125,7 +125,7 @@ static inline bool iter_is_iovec(const struct iov_iter *i) * * The ?: is just for type safety. */ -#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & RW_MASK) +#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & (READ | WRITE)) /* * Cap the iov_iter by given limit; note that the second argument is -- cgit v1.2.3 From 1e3914d4cf4e14653b7917b0e965217465cb7a9c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:12 -0600 Subject: block, fs: move submit_bio to bio.h This is where all the other bio operations live, so users must include bio.h anyway. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 ++ include/linux/fs.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index fe9a17017608..5c604b4914bf 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -404,6 +404,8 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask) } +extern blk_qc_t submit_bio(struct bio *); + extern void bio_endio(struct bio *); static inline void bio_io_error(struct bio *bio) diff --git a/include/linux/fs.h b/include/linux/fs.h index 0ad36e0c7fa7..5b0a9b77534d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2717,7 +2717,6 @@ static inline void remove_inode_hash(struct inode *inode) extern void inode_sb_list_add(struct inode *inode); #ifdef CONFIG_BLOCK -extern blk_qc_t submit_bio(struct bio *); extern int bdev_read_only(struct block_device *); #endif extern int set_blocksize(struct block_device *, int); -- cgit v1.2.3 From 2f8b544477e627a42e66902e948d87f86554aeca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:13 -0600 Subject: block,fs: untangle fs.h and blk_types.h Nothing in fs.h should require blk_types.h to be included. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/9p/vfs_addr.c | 1 + fs/cifs/connect.c | 1 + fs/cifs/transport.c | 1 + fs/gfs2/dir.c | 1 + fs/isofs/compress.c | 1 + fs/ntfs/logfile.c | 1 + fs/ocfs2/buffer_head_io.c | 1 + fs/orangefs/inode.c | 1 + fs/reiserfs/stree.c | 1 + fs/squashfs/block.c | 1 + fs/udf/dir.c | 1 + fs/udf/directory.c | 1 + fs/udf/inode.c | 1 + fs/ufs/balloc.c | 1 + include/linux/fs.h | 2 +- include/linux/swap.h | 1 + include/linux/writeback.h | 2 ++ lib/iov_iter.c | 1 + 18 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 6181ad79e1a5..5ca1fb0043f6 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index aab5227979e2..db726e8311ca 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "cifspdu.h" #include "cifsglob.h" diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 206a597b2293..5f02edc819af 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 3cdde5f5d399..79113219be5f 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -62,6 +62,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c index 44af14b2e916..9bb2fe35799d 100644 --- a/fs/isofs/compress.c +++ b/fs/isofs/compress.c @@ -18,6 +18,7 @@ #include #include +#include #include #include diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c index 761f12f7f3ef..353379ff6057 100644 --- a/fs/ntfs/logfile.c +++ b/fs/ntfs/logfile.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "attrib.h" #include "aops.h" diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 8f040f88ade4..d9ebe11c8990 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -26,6 +26,7 @@ #include #include #include +#include #include diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index ef3b4eb54cf2..551bc74ed2b8 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -8,6 +8,7 @@ * Linux VFS inode operations. */ +#include #include "protocol.h" #include "orangefs-kernel.h" #include "orangefs-bufmap.h" diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index a97e352d05d3..0037aea97d39 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "reiserfs.h" #include #include diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index ce62a380314f..2751476e6b6e 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "squashfs_fs.h" #include "squashfs_fs_sb.h" diff --git a/fs/udf/dir.c b/fs/udf/dir.c index aaec13c95253..2d0e028067eb 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "udf_i.h" #include "udf_sb.h" diff --git a/fs/udf/directory.c b/fs/udf/directory.c index 988d5352bdb8..7aa48bd7cbaf 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -16,6 +16,7 @@ #include #include +#include struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, struct udf_fileident_bh *fibh, diff --git a/fs/udf/inode.c b/fs/udf/inode.c index aad46401ede5..0f3db71753aa 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "udf_i.h" #include "udf_sb.h" diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 67e085d591d8..b035af54f538 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "ufs_fs.h" diff --git a/include/linux/fs.h b/include/linux/fs.h index 5b0a9b77534d..8533e9d59c29 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -38,6 +37,7 @@ struct backing_dev_info; struct bdi_writeback; +struct bio; struct export_operations; struct hd_geometry; struct iovec; diff --git a/include/linux/swap.h b/include/linux/swap.h index a56523cefb9b..3a6aebc23001 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -11,6 +11,7 @@ #include #include #include +#include #include struct notifier_block; diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 797100e10010..e4c38703bf4e 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -10,6 +10,8 @@ #include #include +struct bio; + DECLARE_PER_CPU(int, dirty_throttle_leaks); /* diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f0c7f1481bae..efc953c47572 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1,4 +1,5 @@ #include +#include #include #include #include -- cgit v1.2.3 From 9f08217120568afdfb59973a89a675e649c0096d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:15 -0600 Subject: ceph: don't include blk_types.h in messenger.h The file only needs the struct bvec_iter delcaration, which is available from bvec.h. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/ceph/messenger.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 8dbd7879fdc6..67bcef2ecddb 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -1,7 +1,7 @@ #ifndef __FS_CEPH_MESSENGER_H #define __FS_CEPH_MESSENGER_H -#include +#include #include #include #include -- cgit v1.2.3 From be297968da22cf40c9c419df51e71ba8856a2ec2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:16 -0600 Subject: mm: only include blk_types in swap.h if CONFIG_SWAP is enabled It's only needed for the CONFIG_SWAP-only use of bio_end_io_t. Because CONFIG_SWAP implies CONFIG_BLOCK this will allow to drop some ifdefs in blk_types.h. Instead we'll need to add a few explicit includes that were implicit before, though. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/staging/lustre/include/linux/lnet/types.h | 1 + drivers/staging/lustre/lustre/llite/rw.c | 1 + fs/ntfs/aops.c | 1 + fs/ntfs/mft.c | 1 + fs/reiserfs/inode.c | 1 + fs/splice.c | 1 + include/linux/swap.h | 4 +++- 7 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/staging/lustre/include/linux/lnet/types.h b/drivers/staging/lustre/include/linux/lnet/types.h index f8be0e2f7bf7..8ca1e9d0cfe2 100644 --- a/drivers/staging/lustre/include/linux/lnet/types.h +++ b/drivers/staging/lustre/include/linux/lnet/types.h @@ -34,6 +34,7 @@ #define __LNET_TYPES_H__ #include +#include /** \addtogroup lnet * @{ diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c index 50c0152ba022..76a6836cdf70 100644 --- a/drivers/staging/lustre/lustre/llite/rw.c +++ b/drivers/staging/lustre/lustre/llite/rw.c @@ -47,6 +47,7 @@ #include /* current_is_kswapd() */ #include +#include #define DEBUG_SUBSYSTEM S_LLITE diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index fe251f187ff8..d0cf6fee5c77 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "aops.h" #include "attrib.h" diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index d3c009626032..b6f402194f02 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "attrib.h" #include "aops.h" diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 58b2dedb2a3a..cfeae9b0a2b7 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -19,6 +19,7 @@ #include #include #include +#include int reiserfs_commit_write(struct file *f, struct page *page, unsigned from, unsigned to); diff --git a/fs/splice.c b/fs/splice.c index 153d4f3bd441..51492f26915a 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -17,6 +17,7 @@ * Copyright (C) 2006 Ingo Molnar * */ +#include #include #include #include diff --git a/include/linux/swap.h b/include/linux/swap.h index 3a6aebc23001..bfee1af1f54f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -11,7 +11,6 @@ #include #include #include -#include #include struct notifier_block; @@ -352,6 +351,9 @@ extern int kswapd_run(int nid); extern void kswapd_stop(int nid); #ifdef CONFIG_SWAP + +#include /* for bio_end_io_t */ + /* linux/mm/page_io.c */ extern int swap_readpage(struct page *); extern int swap_writepage(struct page *page, struct writeback_control *wbc); -- cgit v1.2.3 From 7281b4526cefc898d180850b54d1369f38c6b202 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 07:40:17 -0600 Subject: block: remove the CONFIG_BLOCK ifdef in blk_types.h Now that we have a separate header for struct bio_vec there is absolutely no excuse for including this header from non-block I/O code. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 63b750a3b165..bb921028e7c5 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -17,7 +17,6 @@ struct io_context; struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); -#ifdef CONFIG_BLOCK /* * main unit of I/O for the block layer and lower layers (ie drivers and * stacking drivers) @@ -126,8 +125,6 @@ struct bio { #define BVEC_POOL_OFFSET (16 - BVEC_POOL_BITS) #define BVEC_POOL_IDX(bio) ((bio)->bi_flags >> BVEC_POOL_OFFSET) -#endif /* CONFIG_BLOCK */ - /* * Operations and flags common to the bio and request structures. * We use 8 bits for encoding the operation, and the remaining 24 for flags. -- cgit v1.2.3 From 1d796d6a9641fbfcd90fcfaf6fb4894a13d0304f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 1 Nov 2016 09:52:57 -0600 Subject: block: add REQ_BACKGROUND This adds a new request flag, REQ_BACKGROUND, that callers can use to tell the block layer that this is background (non-urgent) IO. Signed-off-by: Jens Axboe Reviewed-by: Christoph Hellwig --- include/linux/blk_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index bb921028e7c5..562ac46cb790 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -177,6 +177,7 @@ enum req_flag_bits { __REQ_FUA, /* forced unit access */ __REQ_PREFLUSH, /* request for cache flush */ __REQ_RAHEAD, /* read ahead, can fail anytime */ + __REQ_BACKGROUND, /* background IO */ __REQ_NR_BITS, /* stops here */ }; @@ -192,6 +193,7 @@ enum req_flag_bits { #define REQ_FUA (1ULL << __REQ_FUA) #define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH) #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) +#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) -- cgit v1.2.3 From 7637241e651ec36e409412869f986dd5f097735f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 1 Nov 2016 10:00:38 -0600 Subject: writeback: add wbc_to_write_flags() Add wbc_to_write_flags(), which returns the write modifier flags to use, based on a struct writeback_control. No functional changes in this patch, but it prepares us for factoring other wbc fields for write type. Signed-off-by: Jens Axboe Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig --- fs/buffer.c | 2 +- fs/f2fs/data.c | 2 +- fs/f2fs/node.c | 2 +- fs/gfs2/meta_io.c | 3 +-- fs/mpage.c | 2 +- fs/xfs/xfs_aops.c | 8 ++------ include/linux/writeback.h | 9 +++++++++ mm/page_io.c | 5 +---- 8 files changed, 17 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index bc7c2bb30a9b..af5776da814a 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1697,7 +1697,7 @@ int __block_write_full_page(struct inode *inode, struct page *page, struct buffer_head *bh, *head; unsigned int blocksize, bbits; int nr_underway = 0; - int write_flags = (wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0); + int write_flags = wbc_to_write_flags(wbc); head = create_page_buffers(page, inode, (1 << BH_Dirty)|(1 << BH_Uptodate)); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b80bf10603d7..9e5561fa4cb6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1249,7 +1249,7 @@ static int f2fs_write_data_page(struct page *page, .sbi = sbi, .type = DATA, .op = REQ_OP_WRITE, - .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? REQ_SYNC : 0, + .op_flags = wbc_to_write_flags(wbc), .page = page, .encrypted_page = NULL, }; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 932f3f8bb57b..d1e29deb4598 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1570,7 +1570,7 @@ static int f2fs_write_node_page(struct page *page, .sbi = sbi, .type = NODE, .op = REQ_OP_WRITE, - .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? REQ_SYNC : 0, + .op_flags = wbc_to_write_flags(wbc), .page = page, .encrypted_page = NULL, }; diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index e562b1191c9c..49db8ef13fdf 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -37,8 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb { struct buffer_head *bh, *head; int nr_underway = 0; - int write_flags = REQ_META | REQ_PRIO | - (wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0); + int write_flags = REQ_META | REQ_PRIO | wbc_to_write_flags(wbc); BUG_ON(!PageLocked(page)); BUG_ON(!page_has_buffers(page)); diff --git a/fs/mpage.c b/fs/mpage.c index f35e2819d0c6..98fc11aa7e0b 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -489,7 +489,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc, struct buffer_head map_bh; loff_t i_size = i_size_read(inode); int ret = 0; - int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? REQ_SYNC : 0); + int op_flags = wbc_to_write_flags(wbc); if (page_has_buffers(page)) { struct buffer_head *head = page_buffers(page); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 594e02c485b2..6be5204a06d3 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -495,9 +495,7 @@ xfs_submit_ioend( ioend->io_bio->bi_private = ioend; ioend->io_bio->bi_end_io = xfs_end_bio; - ioend->io_bio->bi_opf = REQ_OP_WRITE; - if (wbc->sync_mode == WB_SYNC_ALL) - ioend->io_bio->bi_opf |= REQ_SYNC; + ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); /* * If we are failing the IO now, just mark the ioend with an @@ -569,9 +567,7 @@ xfs_chain_bio( bio_chain(ioend->io_bio, new); bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ - ioend->io_bio->bi_opf = REQ_OP_WRITE; - if (wbc->sync_mode == WB_SYNC_ALL) - ioend->io_bio->bi_opf |= REQ_SYNC; + ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); submit_bio(ioend->io_bio); ioend->io_bio = new; } diff --git a/include/linux/writeback.h b/include/linux/writeback.h index e4c38703bf4e..50c96ee8108f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -9,6 +9,7 @@ #include #include #include +#include struct bio; @@ -102,6 +103,14 @@ struct writeback_control { #endif }; +static inline int wbc_to_write_flags(struct writeback_control *wbc) +{ + if (wbc->sync_mode == WB_SYNC_ALL) + return REQ_SYNC; + + return 0; +} + /* * A wb_domain represents a domain that wb's (bdi_writeback's) belong to * and are measured against each other in. There always is one global diff --git a/mm/page_io.c b/mm/page_io.c index a2651f58c86a..23f6d0d3470f 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -320,10 +320,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, ret = -ENOMEM; goto out; } - if (wbc->sync_mode == WB_SYNC_ALL) - bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC); - else - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); count_vm_event(PSWPOUT); set_page_writeback(page); unlock_page(page); -- cgit v1.2.3 From 13edd5e7315a26b448c5f7f33fc7721b1e0c17ef Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 1 Nov 2016 10:01:35 -0600 Subject: writeback: mark background writeback as such If we're doing background type writes, then use the appropriate background write flags for that. Signed-off-by: Jens Axboe Reviewed-by: Christoph Hellwig --- include/linux/writeback.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 50c96ee8108f..c78f9f0920b5 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -107,6 +107,8 @@ static inline int wbc_to_write_flags(struct writeback_control *wbc) { if (wbc->sync_mode == WB_SYNC_ALL) return REQ_SYNC; + else if (wbc->for_kupdate || wbc->for_background) + return REQ_BACKGROUND; return 0; } -- cgit v1.2.3 From 2cefe4dbaadf83b236caab46705b4b5a4958e3b6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 31 Oct 2016 11:59:24 -0600 Subject: block: add bio_iov_iter_get_pages() This is a helper that pins down a range from an iov_iter and adds it to a bio without requiring a separate memory allocation for the page array. It will be used for upcoming direct I/O implementations for block devices and iomap based file systems. Signed-off-by: Kent Overstreet [hch: ported to the iov_iter interface, renamed and added comments. All blame should be directed to me and all fame should go to Kent after this!] Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bio.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/bio.h | 1 + 2 files changed, 50 insertions(+) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index db85c5753a76..2cf6ebabc68c 100644 --- a/block/bio.c +++ b/block/bio.c @@ -847,6 +847,55 @@ done: } EXPORT_SYMBOL(bio_add_page); +/** + * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio + * @bio: bio to add pages to + * @iter: iov iterator describing the region to be mapped + * + * Pins as many pages from *iter and appends them to @bio's bvec array. The + * pages will have to be released using put_page() when done. + */ +int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) +{ + unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; + struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; + struct page **pages = (struct page **)bv; + size_t offset, diff; + ssize_t size; + + size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); + if (unlikely(size <= 0)) + return size ? size : -EFAULT; + nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE; + + /* + * Deep magic below: We need to walk the pinned pages backwards + * because we are abusing the space allocated for the bio_vecs + * for the page array. Because the bio_vecs are larger than the + * page pointers by definition this will always work. But it also + * means we can't use bio_add_page, so any changes to it's semantics + * need to be reflected here as well. + */ + bio->bi_iter.bi_size += size; + bio->bi_vcnt += nr_pages; + + diff = (nr_pages * PAGE_SIZE - offset) - size; + while (nr_pages--) { + bv[nr_pages].bv_page = pages[nr_pages]; + bv[nr_pages].bv_len = PAGE_SIZE; + bv[nr_pages].bv_offset = 0; + } + + bv[0].bv_offset += offset; + bv[0].bv_len -= offset; + if (diff) + bv[bio->bi_vcnt - 1].bv_len -= diff; + + iov_iter_advance(iter, size); + return 0; +} +EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages); + struct submit_bio_ret { struct completion event; int error; diff --git a/include/linux/bio.h b/include/linux/bio.h index 5c604b4914bf..d367cd37a7f7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -427,6 +427,7 @@ void bio_chain(struct bio *, struct bio *); extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); +int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); struct rq_map_data; extern struct bio *bio_map_user_iov(struct request_queue *, const struct iov_iter *, gfp_t); -- cgit v1.2.3 From fd00144301d64f1742541a3c5e64cd1c51f39c55 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 28 Oct 2016 17:19:37 -0700 Subject: blk-mq: Introduce blk_mq_queue_stopped() The function blk_queue_stopped() allows to test whether or not a traditional request queue has been stopped. Introduce a helper function that allows block drivers to query easily whether or not one or more hardware contexts of a blk-mq queue have been stopped. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 20 ++++++++++++++++++++ include/linux/blk-mq.h | 1 + 2 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/block/blk-mq.c b/block/blk-mq.c index 2864e191cc86..28bf667bfe09 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -944,6 +944,26 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async) } EXPORT_SYMBOL(blk_mq_run_hw_queues); +/** + * blk_mq_queue_stopped() - check whether one or more hctxs have been stopped + * @q: request queue. + * + * The caller is responsible for serializing this function against + * blk_mq_{start,stop}_hw_queue(). + */ +bool blk_mq_queue_stopped(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + int i; + + queue_for_each_hw_ctx(q, hctx, i) + if (blk_mq_hctx_stopped(hctx)) + return true; + + return false; +} +EXPORT_SYMBOL(blk_mq_queue_stopped); + void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) { cancel_work(&hctx->run_work); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 535ab2e13d2e..aa930009fcd3 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -223,6 +223,7 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs void blk_mq_abort_requeue_list(struct request_queue *q); void blk_mq_complete_request(struct request *rq, int error); +bool blk_mq_queue_stopped(struct request_queue *q); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_stop_hw_queues(struct request_queue *q); -- cgit v1.2.3 From 9b7dd572cc439fa92e120290eb74d0295567c5a0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 28 Oct 2016 17:20:49 -0700 Subject: blk-mq: Remove blk_mq_cancel_requeue_work() Since blk_mq_requeue_work() no longer restarts stopped queues canceling requeue work is no longer needed to prevent that a stopped queue would be restarted. Hence remove this function. Signed-off-by: Bart Van Assche Cc: Mike Snitzer Cc: Keith Busch Cc: Hannes Reinecke Cc: Johannes Thumshirn Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 6 ------ drivers/md/dm-rq.c | 2 -- drivers/nvme/host/core.c | 1 - include/linux/blk-mq.h | 1 - 4 files changed, 10 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq.c b/block/blk-mq.c index d95034ae64f6..a461823644fb 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -526,12 +526,6 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head) } EXPORT_SYMBOL(blk_mq_add_to_requeue_list); -void blk_mq_cancel_requeue_work(struct request_queue *q) -{ - cancel_delayed_work_sync(&q->requeue_work); -} -EXPORT_SYMBOL_GPL(blk_mq_cancel_requeue_work); - void blk_mq_kick_requeue_list(struct request_queue *q) { kblockd_schedule_delayed_work(&q->requeue_work, 0); diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index a9e9e781bb77..060ccc5a4b1c 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -116,8 +116,6 @@ static void dm_mq_stop_queue(struct request_queue *q) queue_flag_set(QUEUE_FLAG_STOPPED, q); spin_unlock_irqrestore(q->queue_lock, flags); - /* Avoid that requeuing could restart the queue. */ - blk_mq_cancel_requeue_work(q); blk_mq_stop_hw_queues(q); } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 329381a28edf..a764c2aa00a1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2081,7 +2081,6 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl) queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue); spin_unlock_irq(ns->queue->queue_lock); - blk_mq_cancel_requeue_work(ns->queue); blk_mq_stop_hw_queues(ns->queue); } mutex_unlock(&ctrl->namespaces_mutex); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index aa930009fcd3..a85a20f80aaa 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -217,7 +217,6 @@ void __blk_mq_end_request(struct request *rq, int error); void blk_mq_requeue_request(struct request *rq); void blk_mq_add_to_requeue_list(struct request *rq, bool at_head); -void blk_mq_cancel_requeue_work(struct request_queue *q); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); void blk_mq_abort_requeue_list(struct request_queue *q); -- cgit v1.2.3 From 6a83e74d214a47a1371cd2e6a783264fcba7d428 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 2 Nov 2016 10:09:51 -0600 Subject: blk-mq: Introduce blk_mq_quiesce_queue() blk_mq_quiesce_queue() waits until ongoing .queue_rq() invocations have finished. This function does *not* wait until all outstanding requests have finished (this means invocation of request.end_io()). The algorithm used by blk_mq_quiesce_queue() is as follows: * Hold either an RCU read lock or an SRCU read lock around .queue_rq() calls. The former is used if .queue_rq() does not block and the latter if .queue_rq() may block. * blk_mq_quiesce_queue() first calls blk_mq_stop_hw_queues() followed by synchronize_srcu() or synchronize_rcu(). The latter call waits for .queue_rq() invocations that started before blk_mq_quiesce_queue() was called. * The blk_mq_hctx_stopped() calls that control whether or not .queue_rq() will be called are called with the (S)RCU read lock held. This is necessary to avoid race conditions against blk_mq_quiesce_queue(). Signed-off-by: Bart Van Assche Cc: Hannes Reinecke Cc: Johannes Thumshirn Reviewed-by: Sagi Grimberg Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/Kconfig | 1 + block/blk-mq.c | 71 +++++++++++++++++++++++++++++++++++++++++++++----- include/linux/blk-mq.h | 3 +++ include/linux/blkdev.h | 1 + 4 files changed, 69 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/block/Kconfig b/block/Kconfig index 6b0ad08f0677..3a024440a669 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -5,6 +5,7 @@ menuconfig BLOCK bool "Enable the block layer" if EXPERT default y select SBITMAP + select SRCU help Provide block layer support for the kernel. diff --git a/block/blk-mq.c b/block/blk-mq.c index a461823644fb..3dc323543293 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -115,6 +115,33 @@ void blk_mq_unfreeze_queue(struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); +/** + * blk_mq_quiesce_queue() - wait until all ongoing queue_rq calls have finished + * @q: request queue. + * + * Note: this function does not prevent that the struct request end_io() + * callback function is invoked. Additionally, it is not prevented that + * new queue_rq() calls occur unless the queue has been stopped first. + */ +void blk_mq_quiesce_queue(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + unsigned int i; + bool rcu = false; + + blk_mq_stop_hw_queues(q); + + queue_for_each_hw_ctx(q, hctx, i) { + if (hctx->flags & BLK_MQ_F_BLOCKING) + synchronize_srcu(&hctx->queue_rq_srcu); + else + rcu = true; + } + if (rcu) + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue); + void blk_mq_wake_waiters(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; @@ -766,7 +793,7 @@ static inline unsigned int queued_to_index(unsigned int queued) * of IO. In particular, we'd like FIFO behaviour on handling existing * items on the hctx->dispatch list. Ignore that for now. */ -static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) +static void blk_mq_process_rq_list(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; struct request *rq; @@ -778,9 +805,6 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) if (unlikely(blk_mq_hctx_stopped(hctx))) return; - WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) && - cpu_online(hctx->next_cpu)); - hctx->run++; /* @@ -871,6 +895,24 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) } } +static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) +{ + int srcu_idx; + + WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) && + cpu_online(hctx->next_cpu)); + + if (!(hctx->flags & BLK_MQ_F_BLOCKING)) { + rcu_read_lock(); + blk_mq_process_rq_list(hctx); + rcu_read_unlock(); + } else { + srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu); + blk_mq_process_rq_list(hctx); + srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx); + } +} + /* * It'd be great if the workqueue API had a way to pass * in a mask and had some smarts for more clever placement. @@ -1268,7 +1310,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); struct blk_mq_alloc_data data; struct request *rq; - unsigned int request_count = 0; + unsigned int request_count = 0, srcu_idx; struct blk_plug *plug; struct request *same_queue_rq = NULL; blk_qc_t cookie; @@ -1311,7 +1353,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_mq_bio_to_request(rq, bio); /* - * We do limited pluging. If the bio can be merged, do that. + * We do limited plugging. If the bio can be merged, do that. * Otherwise the existing request in the plug list will be * issued. So the plug list will have one request at most */ @@ -1331,7 +1373,16 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_mq_put_ctx(data.ctx); if (!old_rq) goto done; - blk_mq_try_issue_directly(data.hctx, old_rq, &cookie); + + if (!(data.hctx->flags & BLK_MQ_F_BLOCKING)) { + rcu_read_lock(); + blk_mq_try_issue_directly(data.hctx, old_rq, &cookie); + rcu_read_unlock(); + } else { + srcu_idx = srcu_read_lock(&data.hctx->queue_rq_srcu); + blk_mq_try_issue_directly(data.hctx, old_rq, &cookie); + srcu_read_unlock(&data.hctx->queue_rq_srcu, srcu_idx); + } goto done; } @@ -1610,6 +1661,9 @@ static void blk_mq_exit_hctx(struct request_queue *q, if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); + if (hctx->flags & BLK_MQ_F_BLOCKING) + cleanup_srcu_struct(&hctx->queue_rq_srcu); + blk_mq_remove_cpuhp(hctx); blk_free_flush_queue(hctx->fq); sbitmap_free(&hctx->ctx_map); @@ -1690,6 +1744,9 @@ static int blk_mq_init_hctx(struct request_queue *q, flush_start_tag + hctx_idx, node)) goto free_fq; + if (hctx->flags & BLK_MQ_F_BLOCKING) + init_srcu_struct(&hctx->queue_rq_srcu); + return 0; free_fq: diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a85a20f80aaa..ed20ac74c62a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -3,6 +3,7 @@ #include #include +#include struct blk_mq_tags; struct blk_flush_queue; @@ -35,6 +36,8 @@ struct blk_mq_hw_ctx { struct blk_mq_tags *tags; + struct srcu_struct queue_rq_srcu; + unsigned long queued; unsigned long run; #define BLK_MQ_MAX_DISPATCH_ORDER 7 diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8396da2bb698..13d893a69b46 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -918,6 +918,7 @@ extern void __blk_run_queue(struct request_queue *q); extern void __blk_run_queue_uncond(struct request_queue *q); extern void blk_run_queue(struct request_queue *); extern void blk_run_queue_async(struct request_queue *q); +extern void blk_mq_quiesce_queue(struct request_queue *q); extern int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, gfp_t); -- cgit v1.2.3 From 2b053aca76b48e681be57b34ca3a8c2c10b275c5 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 28 Oct 2016 17:21:41 -0700 Subject: blk-mq: Add a kick_requeue_list argument to blk_mq_requeue_request() Most blk_mq_requeue_request() and blk_mq_add_to_requeue_list() calls are followed by kicking the requeue list. Hence add an argument to these two functions that allows to kick the requeue list. This was proposed by Christoph Hellwig. Signed-off-by: Bart Van Assche Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Cc: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- block/blk-flush.c | 5 +---- block/blk-mq.c | 10 +++++++--- drivers/block/xen-blkfront.c | 2 +- drivers/md/dm-rq.c | 2 +- drivers/nvme/host/core.c | 2 +- drivers/scsi/scsi_lib.c | 4 +--- include/linux/blk-mq.h | 5 +++-- 7 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/block/blk-flush.c b/block/blk-flush.c index d35beca18481..c486b7aa62ee 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -134,10 +134,7 @@ static void blk_flush_restore_request(struct request *rq) static bool blk_flush_queue_rq(struct request *rq, bool add_front) { if (rq->q->mq_ops) { - struct request_queue *q = rq->q; - - blk_mq_add_to_requeue_list(rq, add_front); - blk_mq_kick_requeue_list(q); + blk_mq_add_to_requeue_list(rq, add_front, true); return false; } else { if (add_front) diff --git a/block/blk-mq.c b/block/blk-mq.c index 3dc323543293..8d3de5bd4d6f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -492,12 +492,12 @@ static void __blk_mq_requeue_request(struct request *rq) } } -void blk_mq_requeue_request(struct request *rq) +void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list) { __blk_mq_requeue_request(rq); BUG_ON(blk_queued_rq(rq)); - blk_mq_add_to_requeue_list(rq, true); + blk_mq_add_to_requeue_list(rq, true, kick_requeue_list); } EXPORT_SYMBOL(blk_mq_requeue_request); @@ -531,7 +531,8 @@ static void blk_mq_requeue_work(struct work_struct *work) blk_mq_run_hw_queues(q, false); } -void blk_mq_add_to_requeue_list(struct request *rq, bool at_head) +void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, + bool kick_requeue_list) { struct request_queue *q = rq->q; unsigned long flags; @@ -550,6 +551,9 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head) list_add_tail(&rq->queuelist, &q->requeue_list); } spin_unlock_irqrestore(&q->requeue_lock, flags); + + if (kick_requeue_list) + blk_mq_kick_requeue_list(q); } EXPORT_SYMBOL(blk_mq_add_to_requeue_list); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 71ca36eab558..c000fdf048b2 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2043,7 +2043,7 @@ static int blkif_recover(struct blkfront_info *info) /* Requeue pending requests (flush or discard) */ list_del_init(&req->queuelist); BUG_ON(req->nr_phys_segments > segs); - blk_mq_requeue_request(req); + blk_mq_requeue_request(req, false); } blk_mq_start_stopped_hw_queues(info->rq, true); blk_mq_kick_requeue_list(info->rq); diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 060ccc5a4b1c..315257959fc0 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -347,7 +347,7 @@ EXPORT_SYMBOL(dm_mq_kick_requeue_list); static void dm_mq_delay_requeue_request(struct request *rq, unsigned long msecs) { - blk_mq_requeue_request(rq); + blk_mq_requeue_request(rq, false); __dm_mq_kick_requeue_list(rq->q, msecs); } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a764c2aa00a1..e8070e3cc7c7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -203,7 +203,7 @@ void nvme_requeue_req(struct request *req) { unsigned long flags; - blk_mq_requeue_request(req); + blk_mq_requeue_request(req, false); spin_lock_irqsave(req->q->queue_lock, flags); if (!blk_queue_stopped(req->q)) blk_mq_kick_requeue_list(req->q); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 2b78ff12bf3c..2e35132f8be1 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -86,10 +86,8 @@ scsi_set_blocked(struct scsi_cmnd *cmd, int reason) static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd) { struct scsi_device *sdev = cmd->device; - struct request_queue *q = cmd->request->q; - blk_mq_requeue_request(cmd->request); - blk_mq_kick_requeue_list(q); + blk_mq_requeue_request(cmd->request, true); put_device(&sdev->sdev_gendev); } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ed20ac74c62a..35a0af5ede6d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -218,8 +218,9 @@ void blk_mq_start_request(struct request *rq); void blk_mq_end_request(struct request *rq, int error); void __blk_mq_end_request(struct request *rq, int error); -void blk_mq_requeue_request(struct request *rq); -void blk_mq_add_to_requeue_list(struct request *rq, bool at_head); +void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); +void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, + bool kick_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); void blk_mq_abort_requeue_list(struct request_queue *q); -- cgit v1.2.3 From bc8ee596afe8f35b379f87575c46d800dd8e7e68 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Tue, 1 Nov 2016 16:32:25 +0100 Subject: net: mii: add generic function to support ksetting support The old ethtool api (get_setting and set_setting) has generic mii functions mii_ethtool_sset and mii_ethtool_gset. To support the new ethtool api ({get|set}_link_ksettings), we add two generics mii function mii_ethtool_{get|set}_link_ksettings_get. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/mii.c | 195 ++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mii.h | 4 ++ 2 files changed, 199 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/mii.c b/drivers/net/mii.c index 993570b1e2ae..0443546fc427 100644 --- a/drivers/net/mii.c +++ b/drivers/net/mii.c @@ -134,6 +134,101 @@ int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd) return 0; } +/** + * mii_ethtool_get_link_ksettings - get settings that are specified in @cmd + * @mii: MII interface + * @cmd: requested ethtool_link_ksettings + * + * The @cmd parameter is expected to have been cleared before calling + * mii_ethtool_get_link_ksettings(). + * + * Returns 0 for success, negative on error. + */ +int mii_ethtool_get_link_ksettings(struct mii_if_info *mii, + struct ethtool_link_ksettings *cmd) +{ + struct net_device *dev = mii->dev; + u16 bmcr, bmsr, ctrl1000 = 0, stat1000 = 0; + u32 nego, supported, advertising, lp_advertising; + + supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | + SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | + SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII); + if (mii->supports_gmii) + supported |= SUPPORTED_1000baseT_Half | + SUPPORTED_1000baseT_Full; + + /* only supports twisted-pair */ + cmd->base.port = PORT_MII; + + /* this isn't fully supported at higher layers */ + cmd->base.phy_address = mii->phy_id; + cmd->base.mdio_support = ETH_MDIO_SUPPORTS_C22; + + advertising = ADVERTISED_TP | ADVERTISED_MII; + + bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR); + bmsr = mii->mdio_read(dev, mii->phy_id, MII_BMSR); + if (mii->supports_gmii) { + ctrl1000 = mii->mdio_read(dev, mii->phy_id, MII_CTRL1000); + stat1000 = mii->mdio_read(dev, mii->phy_id, MII_STAT1000); + } + if (bmcr & BMCR_ANENABLE) { + advertising |= ADVERTISED_Autoneg; + cmd->base.autoneg = AUTONEG_ENABLE; + + advertising |= mii_get_an(mii, MII_ADVERTISE); + if (mii->supports_gmii) + advertising |= mii_ctrl1000_to_ethtool_adv_t(ctrl1000); + + if (bmsr & BMSR_ANEGCOMPLETE) { + lp_advertising = mii_get_an(mii, MII_LPA); + lp_advertising |= + mii_stat1000_to_ethtool_lpa_t(stat1000); + } else { + lp_advertising = 0; + } + + nego = advertising & lp_advertising; + + if (nego & (ADVERTISED_1000baseT_Full | + ADVERTISED_1000baseT_Half)) { + cmd->base.speed = SPEED_1000; + cmd->base.duplex = !!(nego & ADVERTISED_1000baseT_Full); + } else if (nego & (ADVERTISED_100baseT_Full | + ADVERTISED_100baseT_Half)) { + cmd->base.speed = SPEED_100; + cmd->base.duplex = !!(nego & ADVERTISED_100baseT_Full); + } else { + cmd->base.speed = SPEED_10; + cmd->base.duplex = !!(nego & ADVERTISED_10baseT_Full); + } + } else { + cmd->base.autoneg = AUTONEG_DISABLE; + + cmd->base.speed = ((bmcr & BMCR_SPEED1000 && + (bmcr & BMCR_SPEED100) == 0) ? + SPEED_1000 : + ((bmcr & BMCR_SPEED100) ? + SPEED_100 : SPEED_10)); + cmd->base.duplex = (bmcr & BMCR_FULLDPLX) ? + DUPLEX_FULL : DUPLEX_HALF; + } + + mii->full_duplex = cmd->base.duplex; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising, + lp_advertising); + + /* ignore maxtxpkt, maxrxpkt for now */ + + return 0; +} + /** * mii_ethtool_sset - set settings that are specified in @ecmd * @mii: MII interface @@ -226,6 +321,104 @@ int mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd) return 0; } +/** + * mii_ethtool_set_link_ksettings - set settings that are specified in @cmd + * @mii: MII interfaces + * @cmd: requested ethtool_link_ksettings + * + * Returns 0 for success, negative on error. + */ +int mii_ethtool_set_link_ksettings(struct mii_if_info *mii, + const struct ethtool_link_ksettings *cmd) +{ + struct net_device *dev = mii->dev; + u32 speed = cmd->base.speed; + + if (speed != SPEED_10 && + speed != SPEED_100 && + speed != SPEED_1000) + return -EINVAL; + if (cmd->base.duplex != DUPLEX_HALF && cmd->base.duplex != DUPLEX_FULL) + return -EINVAL; + if (cmd->base.port != PORT_MII) + return -EINVAL; + if (cmd->base.phy_address != mii->phy_id) + return -EINVAL; + if (cmd->base.autoneg != AUTONEG_DISABLE && + cmd->base.autoneg != AUTONEG_ENABLE) + return -EINVAL; + if ((speed == SPEED_1000) && (!mii->supports_gmii)) + return -EINVAL; + + /* ignore supported, maxtxpkt, maxrxpkt */ + + if (cmd->base.autoneg == AUTONEG_ENABLE) { + u32 bmcr, advert, tmp; + u32 advert2 = 0, tmp2 = 0; + u32 advertising; + + ethtool_convert_link_mode_to_legacy_u32( + &advertising, cmd->link_modes.advertising); + + if ((advertising & (ADVERTISED_10baseT_Half | + ADVERTISED_10baseT_Full | + ADVERTISED_100baseT_Half | + ADVERTISED_100baseT_Full | + ADVERTISED_1000baseT_Half | + ADVERTISED_1000baseT_Full)) == 0) + return -EINVAL; + + /* advertise only what has been requested */ + advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE); + tmp = advert & ~(ADVERTISE_ALL | ADVERTISE_100BASE4); + if (mii->supports_gmii) { + advert2 = mii->mdio_read(dev, mii->phy_id, + MII_CTRL1000); + tmp2 = advert2 & + ~(ADVERTISE_1000HALF | ADVERTISE_1000FULL); + } + tmp |= ethtool_adv_to_mii_adv_t(advertising); + + if (mii->supports_gmii) + tmp2 |= ethtool_adv_to_mii_ctrl1000_t(advertising); + if (advert != tmp) { + mii->mdio_write(dev, mii->phy_id, MII_ADVERTISE, tmp); + mii->advertising = tmp; + } + if ((mii->supports_gmii) && (advert2 != tmp2)) + mii->mdio_write(dev, mii->phy_id, MII_CTRL1000, tmp2); + + /* turn on autonegotiation, and force a renegotiate */ + bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR); + bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART); + mii->mdio_write(dev, mii->phy_id, MII_BMCR, bmcr); + + mii->force_media = 0; + } else { + u32 bmcr, tmp; + + /* turn off auto negotiation, set speed and duplexity */ + bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR); + tmp = bmcr & ~(BMCR_ANENABLE | BMCR_SPEED100 | + BMCR_SPEED1000 | BMCR_FULLDPLX); + if (speed == SPEED_1000) + tmp |= BMCR_SPEED1000; + else if (speed == SPEED_100) + tmp |= BMCR_SPEED100; + if (cmd->base.duplex == DUPLEX_FULL) { + tmp |= BMCR_FULLDPLX; + mii->full_duplex = 1; + } else { + mii->full_duplex = 0; + } + if (bmcr != tmp) + mii->mdio_write(dev, mii->phy_id, MII_BMCR, tmp); + + mii->force_media = 1; + } + return 0; +} + /** * mii_check_gmii_support - check if the MII supports Gb interfaces * @mii: the MII interface @@ -466,7 +659,9 @@ MODULE_LICENSE("GPL"); EXPORT_SYMBOL(mii_link_ok); EXPORT_SYMBOL(mii_nway_restart); EXPORT_SYMBOL(mii_ethtool_gset); +EXPORT_SYMBOL(mii_ethtool_get_link_ksettings); EXPORT_SYMBOL(mii_ethtool_sset); +EXPORT_SYMBOL(mii_ethtool_set_link_ksettings); EXPORT_SYMBOL(mii_check_link); EXPORT_SYMBOL(mii_check_media); EXPORT_SYMBOL(mii_check_gmii_support); diff --git a/include/linux/mii.h b/include/linux/mii.h index 47492c9631b3..1629a0c32679 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -31,7 +31,11 @@ struct mii_if_info { extern int mii_link_ok (struct mii_if_info *mii); extern int mii_nway_restart (struct mii_if_info *mii); extern int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd); +extern int mii_ethtool_get_link_ksettings( + struct mii_if_info *mii, struct ethtool_link_ksettings *cmd); extern int mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd); +extern int mii_ethtool_set_link_ksettings( + struct mii_if_info *mii, const struct ethtool_link_ksettings *cmd); extern int mii_check_gmii_support(struct mii_if_info *mii); extern void mii_check_link (struct mii_if_info *mii); extern unsigned int mii_check_media (struct mii_if_info *mii, -- cgit v1.2.3 From 527c02f66d263d2eeff237a2326c3278cfc03d3b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 27 May 2016 11:56:53 +0200 Subject: soc: renesas: Add R-Car RST driver Add a driver for the Renesas R-Car Gen1 RESET/WDT and R-Car Gen2/Gen3 and RZ/G RST module. For now this driver just provides an API to obtain the state of the mode pins, as latched at reset time. As this is typically called from the probe function of a clock driver, which can run much earlier than any initcall, calling rcar_rst_read_mode_pins() just forces an early initialization of the driver. Despite the current simple and almost identical handling for all supported SoCs, the driver matches against SoC-specific compatible values, as the features provided by the hardware module differ a lot across the various SoC families and members. Signed-off-by: Geert Uytterhoeven Acked-by: Dirk Behme --- drivers/soc/renesas/Makefile | 5 ++ drivers/soc/renesas/rcar-rst.c | 92 ++++++++++++++++++++++++++++++++++++ include/linux/soc/renesas/rcar-rst.h | 6 +++ 3 files changed, 103 insertions(+) create mode 100644 drivers/soc/renesas/rcar-rst.c create mode 100644 include/linux/soc/renesas/rcar-rst.h (limited to 'include/linux') diff --git a/drivers/soc/renesas/Makefile b/drivers/soc/renesas/Makefile index 623039c3514c..86cc78cd1962 100644 --- a/drivers/soc/renesas/Makefile +++ b/drivers/soc/renesas/Makefile @@ -1,3 +1,8 @@ +obj-$(CONFIG_ARCH_RCAR_GEN1) += rcar-rst.o +obj-$(CONFIG_ARCH_RCAR_GEN2) += rcar-rst.o +obj-$(CONFIG_ARCH_R8A7795) += rcar-rst.o +obj-$(CONFIG_ARCH_R8A7796) += rcar-rst.o + obj-$(CONFIG_ARCH_R8A7779) += rcar-sysc.o r8a7779-sysc.o obj-$(CONFIG_ARCH_R8A7790) += rcar-sysc.o r8a7790-sysc.o obj-$(CONFIG_ARCH_R8A7791) += rcar-sysc.o r8a7791-sysc.o diff --git a/drivers/soc/renesas/rcar-rst.c b/drivers/soc/renesas/rcar-rst.c new file mode 100644 index 000000000000..a6d1c26d3167 --- /dev/null +++ b/drivers/soc/renesas/rcar-rst.c @@ -0,0 +1,92 @@ +/* + * R-Car Gen1 RESET/WDT, R-Car Gen2, Gen3, and RZ/G RST Driver + * + * Copyright (C) 2016 Glider bvba + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include + +struct rst_config { + unsigned int modemr; /* Mode Monitoring Register Offset */ +}; + +static const struct rst_config rcar_rst_gen1 __initconst = { + .modemr = 0x20, +}; + +static const struct rst_config rcar_rst_gen2 __initconst = { + .modemr = 0x60, +}; + +static const struct of_device_id rcar_rst_matches[] __initconst = { + /* RZ/G is handled like R-Car Gen2 */ + { .compatible = "renesas,r8a7743-rst", .data = &rcar_rst_gen2 }, + { .compatible = "renesas,r8a7745-rst", .data = &rcar_rst_gen2 }, + /* R-Car Gen1 */ + { .compatible = "renesas,r8a7778-reset-wdt", .data = &rcar_rst_gen1 }, + { .compatible = "renesas,r8a7779-reset-wdt", .data = &rcar_rst_gen1 }, + /* R-Car Gen2 */ + { .compatible = "renesas,r8a7790-rst", .data = &rcar_rst_gen2 }, + { .compatible = "renesas,r8a7791-rst", .data = &rcar_rst_gen2 }, + { .compatible = "renesas,r8a7792-rst", .data = &rcar_rst_gen2 }, + { .compatible = "renesas,r8a7793-rst", .data = &rcar_rst_gen2 }, + { .compatible = "renesas,r8a7794-rst", .data = &rcar_rst_gen2 }, + /* R-Car Gen3 is handled like R-Car Gen2 */ + { .compatible = "renesas,r8a7795-rst", .data = &rcar_rst_gen2 }, + { .compatible = "renesas,r8a7796-rst", .data = &rcar_rst_gen2 }, + { /* sentinel */ } +}; + +static void __iomem *rcar_rst_base __initdata; +static u32 saved_mode __initdata; + +static int __init rcar_rst_init(void) +{ + const struct of_device_id *match; + const struct rst_config *cfg; + struct device_node *np; + void __iomem *base; + int error = 0; + + np = of_find_matching_node_and_match(NULL, rcar_rst_matches, &match); + if (!np) + return -ENODEV; + + base = of_iomap(np, 0); + if (!base) { + pr_warn("%s: Cannot map regs\n", np->full_name); + error = -ENOMEM; + goto out_put; + } + + rcar_rst_base = base; + cfg = match->data; + saved_mode = ioread32(base + cfg->modemr); + + pr_debug("%s: MODE = 0x%08x\n", np->full_name, saved_mode); + +out_put: + of_node_put(np); + return error; +} + +int __init rcar_rst_read_mode_pins(u32 *mode) +{ + int error; + + if (!rcar_rst_base) { + error = rcar_rst_init(); + if (error) + return error; + } + + *mode = saved_mode; + return 0; +} diff --git a/include/linux/soc/renesas/rcar-rst.h b/include/linux/soc/renesas/rcar-rst.h new file mode 100644 index 000000000000..a18e0783946b --- /dev/null +++ b/include/linux/soc/renesas/rcar-rst.h @@ -0,0 +1,6 @@ +#ifndef __LINUX_SOC_RENESAS_RCAR_RST_H__ +#define __LINUX_SOC_RENESAS_RCAR_RST_H__ + +int rcar_rst_read_mode_pins(u32 *mode); + +#endif /* __LINUX_SOC_RENESAS_RCAR_RST_H__ */ -- cgit v1.2.3 From 7978a78c828ac8d5351b85480e60ada865b9bea9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 1 Jun 2016 15:23:22 +0200 Subject: clk: renesas: r8a7778: Remove obsolete r8a7778_clocks_init() The R-Car M1A board code no longer calls r8a7778_clocks_init(). Signed-off-by: Geert Uytterhoeven Acked-by: Dirk Behme --- drivers/clk/renesas/clk-r8a7778.c | 13 ------------- include/linux/clk/renesas.h | 1 - 2 files changed, 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/renesas/clk-r8a7778.c b/drivers/clk/renesas/clk-r8a7778.c index 07ea411098a7..886a8380e912 100644 --- a/drivers/clk/renesas/clk-r8a7778.c +++ b/drivers/clk/renesas/clk-r8a7778.c @@ -143,16 +143,3 @@ static void __init r8a7778_cpg_clocks_init(struct device_node *np) CLK_OF_DECLARE(r8a7778_cpg_clks, "renesas,r8a7778-cpg-clocks", r8a7778_cpg_clocks_init); - -void __init r8a7778_clocks_init(u32 mode) -{ - BUG_ON(!(mode & BIT(19))); - - cpg_mode_rates = (!!(mode & BIT(18)) << 2) | - (!!(mode & BIT(12)) << 1) | - (!!(mode & BIT(11))); - cpg_mode_divs = (!!(mode & BIT(2)) << 1) | - (!!(mode & BIT(1))); - - of_clk_init(NULL); -} diff --git a/include/linux/clk/renesas.h b/include/linux/clk/renesas.h index ba6fa4148515..2b663bba1adc 100644 --- a/include/linux/clk/renesas.h +++ b/include/linux/clk/renesas.h @@ -20,7 +20,6 @@ struct device; struct device_node; struct generic_pm_domain; -void r8a7778_clocks_init(u32 mode); void r8a7779_clocks_init(u32 mode); void rcar_gen2_clocks_init(u32 mode); -- cgit v1.2.3 From b9fe9421d06653d735df07954730795d907e618d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 1 Jun 2016 15:24:58 +0200 Subject: clk: renesas: r8a7779: Remove obsolete r8a7779_clocks_init() The R-Car H1 board code no longer calls r8a7779_clocks_init(). Signed-off-by: Geert Uytterhoeven Acked-by: Dirk Behme --- drivers/clk/renesas/clk-r8a7779.c | 9 --------- include/linux/clk/renesas.h | 1 - 2 files changed, 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/renesas/clk-r8a7779.c b/drivers/clk/renesas/clk-r8a7779.c index ca7551bcb115..27fbfafaf2cd 100644 --- a/drivers/clk/renesas/clk-r8a7779.c +++ b/drivers/clk/renesas/clk-r8a7779.c @@ -89,8 +89,6 @@ static const unsigned int cpg_plla_mult[4] __initconst = { 42, 48, 56, 64 }; * Initialization */ -static u32 cpg_mode __initdata; - static struct clk * __init r8a7779_cpg_register_clock(struct device_node *np, struct r8a7779_cpg *cpg, const struct cpg_clk_config *config, @@ -178,10 +176,3 @@ static void __init r8a7779_cpg_clocks_init(struct device_node *np) } CLK_OF_DECLARE(r8a7779_cpg_clks, "renesas,r8a7779-cpg-clocks", r8a7779_cpg_clocks_init); - -void __init r8a7779_clocks_init(u32 mode) -{ - cpg_mode = mode; - - of_clk_init(NULL); -} diff --git a/include/linux/clk/renesas.h b/include/linux/clk/renesas.h index 2b663bba1adc..9e969941f3f6 100644 --- a/include/linux/clk/renesas.h +++ b/include/linux/clk/renesas.h @@ -20,7 +20,6 @@ struct device; struct device_node; struct generic_pm_domain; -void r8a7779_clocks_init(u32 mode); void rcar_gen2_clocks_init(u32 mode); void cpg_mstp_add_clk_domain(struct device_node *np); -- cgit v1.2.3 From 3e91d07bb592895982400002020d510fd7b6e85f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 1 Jun 2016 15:26:53 +0200 Subject: clk: renesas: rcar-gen2: Remove obsolete rcar_gen2_clocks_init() The R-Car Gen2 board code no longer calls rcar_gen2_clocks_init(). Signed-off-by: Geert Uytterhoeven Acked-by: Dirk Behme --- drivers/clk/renesas/clk-rcar-gen2.c | 7 ------- include/linux/clk/renesas.h | 2 -- 2 files changed, 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/renesas/clk-rcar-gen2.c b/drivers/clk/renesas/clk-rcar-gen2.c index 3291fd430ad4..f39519edc645 100644 --- a/drivers/clk/renesas/clk-rcar-gen2.c +++ b/drivers/clk/renesas/clk-rcar-gen2.c @@ -445,10 +445,3 @@ static void __init rcar_gen2_cpg_clocks_init(struct device_node *np) } CLK_OF_DECLARE(rcar_gen2_cpg_clks, "renesas,rcar-gen2-cpg-clocks", rcar_gen2_cpg_clocks_init); - -void __init rcar_gen2_clocks_init(u32 mode) -{ - cpg_mode = mode; - - of_clk_init(NULL); -} diff --git a/include/linux/clk/renesas.h b/include/linux/clk/renesas.h index 9e969941f3f6..9ebf1f8243bb 100644 --- a/include/linux/clk/renesas.h +++ b/include/linux/clk/renesas.h @@ -20,8 +20,6 @@ struct device; struct device_node; struct generic_pm_domain; -void rcar_gen2_clocks_init(u32 mode); - void cpg_mstp_add_clk_domain(struct device_node *np); #ifdef CONFIG_CLK_RENESAS_CPG_MSTP int cpg_mstp_attach_dev(struct generic_pm_domain *unused, struct device *dev); -- cgit v1.2.3 From 1610a73c4175e7d63985316b52ac932b65a4dc90 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 3 Nov 2016 10:56:12 +0100 Subject: netfilter: kill NF_HOOK_THRESH() and state->tresh Patch c5136b15ea36 ("netfilter: bridge: add and use br_nf_hook_thresh") introduced br_nf_hook_thresh(). Replace NF_HOOK_THRESH() by br_nf_hook_thresh from br_nf_forward_finish(), so we have no more callers for this macro. As a result, state->thresh and explicit thresh parameter in the hook state structure is not required anymore. And we can get rid of skip-hook-under-thresh loop in nf_iterate() in the core path that is only used by br_netfilter to search for the filter hook. Suggested-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 50 +++++++++-------------------------- include/linux/netfilter_ingress.h | 2 +- net/bridge/br_netfilter_hooks.c | 8 +++--- net/bridge/netfilter/ebtable_broute.c | 2 +- net/netfilter/core.c | 4 --- net/netfilter/nf_queue.c | 2 -- 6 files changed, 19 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index abc7fdcb9eb1..e0d000f6c9bf 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -49,7 +49,6 @@ struct sock; struct nf_hook_state { unsigned int hook; - int thresh; u_int8_t pf; struct net_device *in; struct net_device *out; @@ -84,7 +83,7 @@ struct nf_hook_entry { static inline void nf_hook_state_init(struct nf_hook_state *p, struct nf_hook_entry *hook_entry, unsigned int hook, - int thresh, u_int8_t pf, + u_int8_t pf, struct net_device *indev, struct net_device *outdev, struct sock *sk, @@ -92,7 +91,6 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { p->hook = hook; - p->thresh = thresh; p->pf = pf; p->in = indev; p->out = outdev; @@ -155,20 +153,16 @@ extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); /** - * nf_hook_thresh - call a netfilter hook + * nf_hook - call a netfilter hook * * Returns 1 if the hook has allowed the packet to pass. The function * okfn must be invoked by the caller in this case. Any other return * value indicates the packet has been consumed by the hook. */ -static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, - struct net *net, - struct sock *sk, - struct sk_buff *skb, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct net *, struct sock *, struct sk_buff *), - int thresh) +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, + struct sock *sk, struct sk_buff *skb, + struct net_device *indev, struct net_device *outdev, + int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { struct nf_hook_entry *hook_head; int ret = 1; @@ -185,8 +179,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, if (hook_head) { struct nf_hook_state state; - nf_hook_state_init(&state, hook_head, hook, thresh, - pf, indev, outdev, sk, net, okfn); + nf_hook_state_init(&state, hook_head, hook, pf, indev, outdev, + sk, net, okfn); ret = nf_hook_slow(skb, &state); } @@ -195,14 +189,6 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, return ret; } -static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, - struct sock *sk, struct sk_buff *skb, - struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct net *, struct sock *, struct sk_buff *)) -{ - return nf_hook_thresh(pf, hook, net, sk, skb, indev, outdev, okfn, INT_MIN); -} - /* Activate hook; either okfn or kfree_skb called, unless a hook returns NF_STOLEN (in which case, it's up to the hook to deal with the consequences). @@ -220,19 +206,6 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, coders :) */ -static inline int -NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, - struct sk_buff *skb, struct net_device *in, - struct net_device *out, - int (*okfn)(struct net *, struct sock *, struct sk_buff *), - int thresh) -{ - int ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, thresh); - if (ret == 1) - ret = okfn(net, sk, skb); - return ret; -} - static inline int NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, @@ -242,7 +215,7 @@ NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, int ret; if (!cond || - ((ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, INT_MIN)) == 1)) + ((ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn)) == 1)) ret = okfn(net, sk, skb); return ret; } @@ -252,7 +225,10 @@ NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { - return NF_HOOK_THRESH(pf, hook, net, sk, skb, in, out, okfn, INT_MIN); + int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn); + if (ret == 1) + ret = okfn(net, sk, skb); + return ret; } /* Call setsockopt() */ diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 33e37fb41d5d..fd44e4131710 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -26,7 +26,7 @@ static inline int nf_hook_ingress(struct sk_buff *skb) if (unlikely(!e)) return 0; - nf_hook_state_init(&state, e, NF_NETDEV_INGRESS, INT_MIN, + nf_hook_state_init(&state, e, NF_NETDEV_INGRESS, NFPROTO_NETDEV, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); return nf_hook_slow(skb, &state); diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 2fe9345c1407..d0d66faebe90 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -561,8 +561,8 @@ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff } nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, net, sk, skb, - in, skb->dev, br_forward_finish, 1); + br_nf_hook_thresh(NF_BR_FORWARD, net, sk, skb, in, skb->dev, + br_forward_finish); return 0; } @@ -1016,8 +1016,8 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, /* We may already have this, but read-locks nest anyway */ rcu_read_lock(); - nf_hook_state_init(&state, elem, hook, NF_BR_PRI_BRNF + 1, - NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); + nf_hook_state_init(&state, elem, hook, NFPROTO_BRIDGE, indev, outdev, + sk, net, okfn); ret = nf_hook_slow(skb, &state); rcu_read_unlock(); diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index ec94c6f1ae88..599679e3498d 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -53,7 +53,7 @@ static int ebt_broute(struct sk_buff *skb) struct nf_hook_state state; int ret; - nf_hook_state_init(&state, NULL, NF_BR_BROUTING, INT_MIN, + nf_hook_state_init(&state, NULL, NF_BR_BROUTING, NFPROTO_BRIDGE, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 76014ad72ec5..cb0232c11bc8 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -309,10 +309,6 @@ unsigned int nf_iterate(struct sk_buff *skb, unsigned int verdict; while (*entryp) { - if (state->thresh > (*entryp)->ops.priority) { - *entryp = rcu_dereference((*entryp)->next); - continue; - } repeat: verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state); if (verdict != NF_ACCEPT) { diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 8f08d759844a..0fb38966e5bf 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -200,8 +200,6 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) verdict = NF_DROP; } - entry->state.thresh = INT_MIN; - if (verdict == NF_ACCEPT) { hook_entry = rcu_dereference(hook_entry->next); if (hook_entry) -- cgit v1.2.3 From 613dbd95723aee7abd16860745691b6c7bda20dc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 3 Nov 2016 10:56:21 +0100 Subject: netfilter: x_tables: move hook state into xt_action_param structure Place pointer to hook state in xt_action_param structure instead of copying the fields that we need. After this change xt_action_param fits into one cacheline. This patch also adds a set of new wrapper functions to fetch relevant hook state structure fields. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 48 +++++++++++++++++++++++------- include/net/netfilter/nf_tables.h | 11 +++---- net/bridge/netfilter/ebt_arpreply.c | 3 +- net/bridge/netfilter/ebt_log.c | 11 +++---- net/bridge/netfilter/ebt_nflog.c | 6 ++-- net/bridge/netfilter/ebt_redirect.c | 6 ++-- net/bridge/netfilter/ebtables.c | 6 +--- net/ipv4/netfilter/arp_tables.c | 6 +--- net/ipv4/netfilter/ip_tables.c | 6 +--- net/ipv4/netfilter/ipt_MASQUERADE.c | 3 +- net/ipv4/netfilter/ipt_REJECT.c | 4 +-- net/ipv4/netfilter/ipt_SYNPROXY.c | 4 +-- net/ipv4/netfilter/ipt_rpfilter.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 6 +--- net/ipv6/netfilter/ip6t_MASQUERADE.c | 2 +- net/ipv6/netfilter/ip6t_REJECT.c | 23 ++++++++------ net/ipv6/netfilter/ip6t_SYNPROXY.c | 4 +-- net/ipv6/netfilter/ip6t_rpfilter.c | 3 +- net/netfilter/ipset/ip_set_core.c | 6 ++-- net/netfilter/ipset/ip_set_hash_netiface.c | 2 +- net/netfilter/xt_AUDIT.c | 10 +++---- net/netfilter/xt_LOG.c | 6 ++-- net/netfilter/xt_NETMAP.c | 20 ++++++------- net/netfilter/xt_NFLOG.c | 6 ++-- net/netfilter/xt_NFQUEUE.c | 4 +-- net/netfilter/xt_REDIRECT.c | 4 +-- net/netfilter/xt_TCPMSS.c | 4 +-- net/netfilter/xt_TEE.c | 4 +-- net/netfilter/xt_TPROXY.c | 16 +++++----- net/netfilter/xt_addrtype.c | 10 +++---- net/netfilter/xt_cluster.c | 2 +- net/netfilter/xt_connlimit.c | 8 ++--- net/netfilter/xt_conntrack.c | 8 ++--- net/netfilter/xt_devgroup.c | 4 +-- net/netfilter/xt_dscp.c | 2 +- net/netfilter/xt_ipvs.c | 4 +-- net/netfilter/xt_nfacct.c | 2 +- net/netfilter/xt_osf.c | 10 +++---- net/netfilter/xt_owner.c | 2 +- net/netfilter/xt_pkttype.c | 4 +-- net/netfilter/xt_policy.c | 4 +-- net/netfilter/xt_recent.c | 10 +++---- net/netfilter/xt_set.c | 26 ++++++++-------- net/netfilter/xt_socket.c | 4 +-- net/sched/act_ipt.c | 12 ++++---- net/sched/em_ipset.c | 17 ++++++----- 46 files changed, 196 insertions(+), 169 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 2ad1a2b289b5..cd4eaf8df445 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -4,6 +4,7 @@ #include #include +#include #include /* Test a struct->invflags and a boolean for inequality */ @@ -17,14 +18,9 @@ * @target: the target extension * @matchinfo: per-match data * @targetinfo: per-target data - * @net network namespace through which the action was invoked - * @in: input netdevice - * @out: output netdevice + * @state: pointer to hook state this packet came from * @fragoff: packet is a fragment, this is the data offset * @thoff: position of transport header relative to skb->data - * @hook: hook number given packet came from - * @family: Actual NFPROTO_* through which the function is invoked - * (helpful when match->family == NFPROTO_UNSPEC) * * Fields written to by extensions: * @@ -38,15 +34,47 @@ struct xt_action_param { union { const void *matchinfo, *targinfo; }; - struct net *net; - const struct net_device *in, *out; + const struct nf_hook_state *state; int fragoff; unsigned int thoff; - unsigned int hooknum; - u_int8_t family; bool hotdrop; }; +static inline struct net *xt_net(const struct xt_action_param *par) +{ + return par->state->net; +} + +static inline struct net_device *xt_in(const struct xt_action_param *par) +{ + return par->state->in; +} + +static inline const char *xt_inname(const struct xt_action_param *par) +{ + return par->state->in->name; +} + +static inline struct net_device *xt_out(const struct xt_action_param *par) +{ + return par->state->out; +} + +static inline const char *xt_outname(const struct xt_action_param *par) +{ + return par->state->out->name; +} + +static inline unsigned int xt_hooknum(const struct xt_action_param *par) +{ + return par->state->hook; +} + +static inline u_int8_t xt_family(const struct xt_action_param *par) +{ + return par->state->pf; +} + /** * struct xt_mtchk_param - parameters for match extensions' * checkentry functions diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5031e072567b..44060344f958 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -30,11 +30,12 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_state *state) { pkt->skb = skb; - pkt->net = pkt->xt.net = state->net; - pkt->in = pkt->xt.in = state->in; - pkt->out = pkt->xt.out = state->out; - pkt->hook = pkt->xt.hooknum = state->hook; - pkt->pf = pkt->xt.family = state->pf; + pkt->net = state->net; + pkt->in = state->in; + pkt->out = state->out; + pkt->hook = state->hook; + pkt->pf = state->pf; + pkt->xt.state = state; } static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt, diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c index 070cf134a22f..5929309beaa1 100644 --- a/net/bridge/netfilter/ebt_arpreply.c +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -51,7 +51,8 @@ ebt_arpreply_tg(struct sk_buff *skb, const struct xt_action_param *par) if (diptr == NULL) return EBT_DROP; - arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)par->in, + arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, + (struct net_device *)xt_in(par), *diptr, shp, info->mac, shp); return info->target; diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 9a11086ba6ff..e88bd4827ac1 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -179,7 +179,7 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_log_info *info = par->targinfo; struct nf_loginfo li; - struct net *net = par->net; + struct net *net = xt_net(par); li.type = NF_LOG_TYPE_LOG; li.u.log.level = info->loglevel; @@ -190,11 +190,12 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par) * nf_log_packet() with NFT_LOG_TYPE_LOG here. --Pablo */ if (info->bitmask & EBT_LOG_NFLOG) - nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, - par->in, par->out, &li, "%s", info->prefix); + nf_log_packet(net, NFPROTO_BRIDGE, xt_hooknum(par), skb, + xt_in(par), xt_out(par), &li, "%s", + info->prefix); else - ebt_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, par->in, - par->out, &li, info->prefix); + ebt_log_packet(net, NFPROTO_BRIDGE, xt_hooknum(par), skb, + xt_in(par), xt_out(par), &li, info->prefix); return EBT_CONTINUE; } diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c index 54816150608e..c1dc48686200 100644 --- a/net/bridge/netfilter/ebt_nflog.c +++ b/net/bridge/netfilter/ebt_nflog.c @@ -23,16 +23,16 @@ static unsigned int ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nflog_info *info = par->targinfo; + struct net *net = xt_net(par); struct nf_loginfo li; - struct net *net = par->net; li.type = NF_LOG_TYPE_ULOG; li.u.ulog.copy_len = info->len; li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; - nf_log_packet(net, PF_BRIDGE, par->hooknum, skb, par->in, - par->out, &li, "%s", info->prefix); + nf_log_packet(net, PF_BRIDGE, xt_hooknum(par), skb, xt_in(par), + xt_out(par), &li, "%s", info->prefix); return EBT_CONTINUE; } diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 2e7c4f974340..8d2a85e0594e 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -23,12 +23,12 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) if (!skb_make_writable(skb, 0)) return EBT_DROP; - if (par->hooknum != NF_BR_BROUTING) + if (xt_hooknum(par) != NF_BR_BROUTING) /* rcu_read_lock()ed by nf_hook_thresh */ ether_addr_copy(eth_hdr(skb)->h_dest, - br_port_get_rcu(par->in)->br->dev->dev_addr); + br_port_get_rcu(xt_in(par))->br->dev->dev_addr); else - ether_addr_copy(eth_hdr(skb)->h_dest, par->in->dev_addr); + ether_addr_copy(eth_hdr(skb)->h_dest, xt_in(par)->dev_addr); skb->pkt_type = PACKET_HOST; return info->target; } diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index f5c11bbe27db..1ab6014cf0f8 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -194,12 +194,8 @@ unsigned int ebt_do_table(struct sk_buff *skb, const struct ebt_table_info *private; struct xt_action_param acpar; - acpar.family = NFPROTO_BRIDGE; - acpar.net = state->net; - acpar.in = state->in; - acpar.out = state->out; + acpar.state = state; acpar.hotdrop = false; - acpar.hooknum = hook; read_lock_bh(&table->lock); private = table->private; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index b31df597fd37..e76ab23a2deb 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -217,11 +217,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, */ e = get_entry(table_base, private->hook_entry[hook]); - acpar.net = state->net; - acpar.in = state->in; - acpar.out = state->out; - acpar.hooknum = hook; - acpar.family = NFPROTO_ARP; + acpar.state = state; acpar.hotdrop = false; arp = arp_hdr(skb); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 7c00ce90adb8..de4fa03f46f3 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -261,11 +261,7 @@ ipt_do_table(struct sk_buff *skb, acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; acpar.thoff = ip_hdrlen(skb); acpar.hotdrop = false; - acpar.net = state->net; - acpar.in = state->in; - acpar.out = state->out; - acpar.family = NFPROTO_IPV4; - acpar.hooknum = hook; + acpar.state = state; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); local_bh_disable(); diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index da7f02a0b868..34cfb9b0bc0a 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -55,7 +55,8 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) range.min_proto = mr->range[0].min; range.max_proto = mr->range[0].max; - return nf_nat_masquerade_ipv4(skb, par->hooknum, &range, par->out); + return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), &range, + xt_out(par)); } static struct xt_target masquerade_tg_reg __read_mostly = { diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 1d16c0f28df0..8bd0d7b26632 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -34,7 +34,7 @@ static unsigned int reject_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ipt_reject_info *reject = par->targinfo; - int hook = par->hooknum; + int hook = xt_hooknum(par); switch (reject->with) { case IPT_ICMP_NET_UNREACHABLE: @@ -59,7 +59,7 @@ reject_tg(struct sk_buff *skb, const struct xt_action_param *par) nf_send_unreach(skb, ICMP_PKT_FILTERED, hook); break; case IPT_TCP_RESET: - nf_send_reset(par->net, skb, hook); + nf_send_reset(xt_net(par), skb, hook); case IPT_ICMP_ECHOREPLY: /* Doesn't happen. */ break; diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index db5b87509446..361411688221 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -263,12 +263,12 @@ static unsigned int synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_synproxy_info *info = par->targinfo; - struct net *net = par->net; + struct net *net = xt_net(par); struct synproxy_net *snet = synproxy_pernet(net); struct synproxy_options opts = {}; struct tcphdr *th, _th; - if (nf_ip_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP)) + if (nf_ip_checksum(skb, xt_hooknum(par), par->thoff, IPPROTO_TCP)) return NF_DROP; th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th); diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 78cc64eddfc1..59b49945b481 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -95,7 +95,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) flow.flowi4_tos = RT_TOS(iph->tos); flow.flowi4_scope = RT_SCOPE_UNIVERSE; - return rpfilter_lookup_reverse(par->net, &flow, par->in, info->flags) ^ invert; + return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert; } static int rpfilter_check(const struct xt_mtchk_param *par) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 55aacea24396..7eac01d5d621 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -291,11 +291,7 @@ ip6t_do_table(struct sk_buff *skb, * rule is also a fragment-specific rule, non-fragments won't * match it. */ acpar.hotdrop = false; - acpar.net = state->net; - acpar.in = state->in; - acpar.out = state->out; - acpar.family = NFPROTO_IPV6; - acpar.hooknum = hook; + acpar.state = state; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c index 7f9f45d829d2..2b1a15846f9a 100644 --- a/net/ipv6/netfilter/ip6t_MASQUERADE.c +++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c @@ -24,7 +24,7 @@ static unsigned int masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par) { - return nf_nat_masquerade_ipv6(skb, par->targinfo, par->out); + return nf_nat_masquerade_ipv6(skb, par->targinfo, xt_out(par)); } static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par) diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index db29bbf41b59..fa51a205918d 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -39,35 +39,40 @@ static unsigned int reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_reject_info *reject = par->targinfo; - struct net *net = par->net; + struct net *net = xt_net(par); switch (reject->with) { case IP6T_ICMP6_NO_ROUTE: - nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_NOROUTE, xt_hooknum(par)); break; case IP6T_ICMP6_ADM_PROHIBITED: - nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, + xt_hooknum(par)); break; case IP6T_ICMP6_NOT_NEIGHBOUR: - nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, + xt_hooknum(par)); break; case IP6T_ICMP6_ADDR_UNREACH: - nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, + xt_hooknum(par)); break; case IP6T_ICMP6_PORT_UNREACH: - nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, + xt_hooknum(par)); break; case IP6T_ICMP6_ECHOREPLY: /* Do nothing */ break; case IP6T_TCP_RESET: - nf_send_reset6(net, skb, par->hooknum); + nf_send_reset6(net, skb, xt_hooknum(par)); break; case IP6T_ICMP6_POLICY_FAIL: - nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, xt_hooknum(par)); break; case IP6T_ICMP6_REJECT_ROUTE: - nf_send_unreach6(net, skb, ICMPV6_REJECT_ROUTE, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_REJECT_ROUTE, + xt_hooknum(par)); break; } diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 06bed74cf5ee..99a1216287c8 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -277,12 +277,12 @@ static unsigned int synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_synproxy_info *info = par->targinfo; - struct net *net = par->net; + struct net *net = xt_net(par); struct synproxy_net *snet = synproxy_pernet(net); struct synproxy_options opts = {}; struct tcphdr *th, _th; - if (nf_ip6_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP)) + if (nf_ip6_checksum(skb, xt_hooknum(par), par->thoff, IPPROTO_TCP)) return NF_DROP; th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th); diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 1ee1b25df096..d5263dc364a9 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -93,7 +93,8 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) if (unlikely(saddrtype == IPV6_ADDR_ANY)) return true ^ invert; /* not routable: forward path will drop it */ - return rpfilter_lookup_reverse6(par->net, skb, par->in, info->flags) ^ invert; + return rpfilter_lookup_reverse6(xt_net(par), skb, xt_in(par), + info->flags) ^ invert; } static int rpfilter_check(const struct xt_mtchk_param *par) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index a748b0c2c981..3f1b945a24d5 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -541,7 +541,7 @@ int ip_set_test(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(par->net, index); + struct ip_set *set = ip_set_rcu_get(xt_net(par), index); int ret = 0; BUG_ON(!set); @@ -579,7 +579,7 @@ int ip_set_add(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(par->net, index); + struct ip_set *set = ip_set_rcu_get(xt_net(par), index); int ret; BUG_ON(!set); @@ -601,7 +601,7 @@ int ip_set_del(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(par->net, index); + struct ip_set *set = ip_set_rcu_get(xt_net(par), index); int ret = 0; BUG_ON(!set); diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index f0f688db6213..aa1a776613b9 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -170,7 +170,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); e.ip &= ip_set_netmask(e.cidr); -#define IFACE(dir) (par->dir ? par->dir->name : "") +#define IFACE(dir) (par->state->dir ? par->state->dir->name : "") #define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC) if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c index 4973cbddc446..19247a17e511 100644 --- a/net/netfilter/xt_AUDIT.c +++ b/net/netfilter/xt_AUDIT.c @@ -132,9 +132,9 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par) goto errout; audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s", - info->type, par->hooknum, skb->len, - par->in ? par->in->name : "?", - par->out ? par->out->name : "?"); + info->type, xt_hooknum(par), skb->len, + xt_in(par) ? xt_inname(par) : "?", + xt_out(par) ? xt_outname(par) : "?"); if (skb->mark) audit_log_format(ab, " mark=%#x", skb->mark); @@ -144,7 +144,7 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par) eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, ntohs(eth_hdr(skb)->h_proto)); - if (par->family == NFPROTO_BRIDGE) { + if (xt_family(par) == NFPROTO_BRIDGE) { switch (eth_hdr(skb)->h_proto) { case htons(ETH_P_IP): audit_ip4(ab, skb); @@ -157,7 +157,7 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par) } } - switch (par->family) { + switch (xt_family(par)) { case NFPROTO_IPV4: audit_ip4(ab, skb); break; diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index 1763ab82bcd7..c3b2017ebe41 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -32,15 +32,15 @@ static unsigned int log_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_log_info *loginfo = par->targinfo; + struct net *net = xt_net(par); struct nf_loginfo li; - struct net *net = par->net; li.type = NF_LOG_TYPE_LOG; li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - nf_log_packet(net, par->family, par->hooknum, skb, par->in, par->out, - &li, "%s", loginfo->prefix); + nf_log_packet(net, xt_family(par), xt_hooknum(par), skb, xt_in(par), + xt_out(par), &li, "%s", loginfo->prefix); return XT_CONTINUE; } diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c index b253e07cb1c5..94d0b5411192 100644 --- a/net/netfilter/xt_NETMAP.c +++ b/net/netfilter/xt_NETMAP.c @@ -33,8 +33,8 @@ netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par) netmask.ip6[i] = ~(range->min_addr.ip6[i] ^ range->max_addr.ip6[i]); - if (par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT) + if (xt_hooknum(par) == NF_INET_PRE_ROUTING || + xt_hooknum(par) == NF_INET_LOCAL_OUT) new_addr.in6 = ipv6_hdr(skb)->daddr; else new_addr.in6 = ipv6_hdr(skb)->saddr; @@ -51,7 +51,7 @@ netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par) newrange.min_proto = range->min_proto; newrange.max_proto = range->max_proto; - return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); + return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par))); } static int netmap_tg6_checkentry(const struct xt_tgchk_param *par) @@ -72,16 +72,16 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par) const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; struct nf_nat_range newrange; - NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_POST_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT || - par->hooknum == NF_INET_LOCAL_IN); + NF_CT_ASSERT(xt_hooknum(par) == NF_INET_PRE_ROUTING || + xt_hooknum(par) == NF_INET_POST_ROUTING || + xt_hooknum(par) == NF_INET_LOCAL_OUT || + xt_hooknum(par) == NF_INET_LOCAL_IN); ct = nf_ct_get(skb, &ctinfo); netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); - if (par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT) + if (xt_hooknum(par) == NF_INET_PRE_ROUTING || + xt_hooknum(par) == NF_INET_LOCAL_OUT) new_ip = ip_hdr(skb)->daddr & ~netmask; else new_ip = ip_hdr(skb)->saddr & ~netmask; @@ -96,7 +96,7 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par) newrange.max_proto = mr->range[0].max; /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); + return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par))); } static int netmap_tg4_check(const struct xt_tgchk_param *par) diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index 8668a5c18dc3..c7f8958cea4a 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -25,8 +25,8 @@ static unsigned int nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_nflog_info *info = par->targinfo; + struct net *net = xt_net(par); struct nf_loginfo li; - struct net *net = par->net; li.type = NF_LOG_TYPE_ULOG; li.u.ulog.copy_len = info->len; @@ -37,8 +37,8 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) if (info->flags & XT_NFLOG_F_COPY_LEN) li.u.ulog.flags |= NF_LOG_F_COPY_LEN; - nfulnl_log_packet(net, par->family, par->hooknum, skb, par->in, - par->out, &li, info->prefix); + nfulnl_log_packet(net, xt_family(par), xt_hooknum(par), skb, + xt_in(par), xt_out(par), &li, info->prefix); return XT_CONTINUE; } diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 8f1779ff7e30..a360b99a958a 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -43,7 +43,7 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) if (info->queues_total > 1) { queue = nfqueue_hash(skb, queue, info->queues_total, - par->family, jhash_initval); + xt_family(par), jhash_initval); } return NF_QUEUE_NR(queue); } @@ -98,7 +98,7 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) queue = info->queuenum + cpu % info->queues_total; } else { queue = nfqueue_hash(skb, queue, info->queues_total, - par->family, jhash_initval); + xt_family(par), jhash_initval); } } diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c index 03f0b370e178..651dce65a30b 100644 --- a/net/netfilter/xt_REDIRECT.c +++ b/net/netfilter/xt_REDIRECT.c @@ -31,7 +31,7 @@ static unsigned int redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par) { - return nf_nat_redirect_ipv6(skb, par->targinfo, par->hooknum); + return nf_nat_redirect_ipv6(skb, par->targinfo, xt_hooknum(par)); } static int redirect_tg6_checkentry(const struct xt_tgchk_param *par) @@ -62,7 +62,7 @@ static int redirect_tg4_check(const struct xt_tgchk_param *par) static unsigned int redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par) { - return nf_nat_redirect_ipv4(skb, par->targinfo, par->hooknum); + return nf_nat_redirect_ipv4(skb, par->targinfo, xt_hooknum(par)); } static struct xt_target redirect_tg_reg[] __read_mostly = { diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 872db2d0e2a9..27241a767f17 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -108,7 +108,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, return -1; if (info->mss == XT_TCPMSS_CLAMP_PMTU) { - struct net *net = par->net; + struct net *net = xt_net(par); unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family); unsigned int min_mtu = min(dst_mtu(skb_dst(skb)), in_mtu); @@ -172,7 +172,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, * length IPv6 header of 60, ergo the default MSS value is 1220 * Since no MSS was provided, we must use the default values */ - if (par->family == NFPROTO_IPV4) + if (xt_family(par) == NFPROTO_IPV4) newmss = min(newmss, (u16)536); else newmss = min(newmss, (u16)1220); diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 0471db4032c5..1c57ace75ae6 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -33,7 +33,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) const struct xt_tee_tginfo *info = par->targinfo; int oif = info->priv ? info->priv->oif : 0; - nf_dup_ipv4(par->net, skb, par->hooknum, &info->gw.in, oif); + nf_dup_ipv4(xt_net(par), skb, xt_hooknum(par), &info->gw.in, oif); return XT_CONTINUE; } @@ -45,7 +45,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) const struct xt_tee_tginfo *info = par->targinfo; int oif = info->priv ? info->priv->oif : 0; - nf_dup_ipv6(par->net, skb, par->hooknum, &info->gw.in6, oif); + nf_dup_ipv6(xt_net(par), skb, xt_hooknum(par), &info->gw.in6, oif); return XT_CONTINUE; } diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 663c4c3c9072..dbd72cc40e42 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -364,7 +364,8 @@ tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info *tgi = par->targinfo; - return tproxy_tg4(par->net, skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value); + return tproxy_tg4(xt_net(par), skb, tgi->laddr, tgi->lport, + tgi->mark_mask, tgi->mark_value); } static unsigned int @@ -372,7 +373,8 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tproxy_target_info_v1 *tgi = par->targinfo; - return tproxy_tg4(par->net, skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value); + return tproxy_tg4(xt_net(par), skb, tgi->laddr.ip, tgi->lport, + tgi->mark_mask, tgi->mark_value); } #ifdef XT_TPROXY_HAVE_IPV6 @@ -442,7 +444,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, * to a listener socket if there's one */ struct sock *sk2; - sk2 = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto, + sk2 = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto, &iph->saddr, tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr), hp->source, @@ -485,10 +487,10 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) * addresses, this happens if the redirect already happened * and the current packet belongs to an already established * connection */ - sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto, + sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto, &iph->saddr, &iph->daddr, hp->source, hp->dest, - par->in, NFT_LOOKUP_ESTABLISHED); + xt_in(par), NFT_LOOKUP_ESTABLISHED); laddr = tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr); lport = tgi->lport ? tgi->lport : hp->dest; @@ -500,10 +502,10 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) else if (!sk) /* no there's no established connection, check if * there's a listener on the redirected addr/port */ - sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, + sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto, &iph->saddr, laddr, hp->source, lport, - par->in, NFT_LOOKUP_LISTENER); + xt_in(par), NFT_LOOKUP_LISTENER); /* NOTE: assign_sock consumes our sk reference */ if (sk && tproxy_sk_is_transparent(sk)) { diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 11d6091991a4..e329dabde35f 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -125,7 +125,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev, static bool addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = par->net; + struct net *net = xt_net(par); const struct xt_addrtype_info *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); bool ret = true; @@ -143,19 +143,19 @@ addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) static bool addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = par->net; + struct net *net = xt_net(par); const struct xt_addrtype_info_v1 *info = par->matchinfo; const struct iphdr *iph; const struct net_device *dev = NULL; bool ret = true; if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) - dev = par->in; + dev = xt_in(par); else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) - dev = par->out; + dev = xt_out(par); #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - if (par->family == NFPROTO_IPV6) + if (xt_family(par) == NFPROTO_IPV6) return addrtype_mt6(net, dev, skb, info); #endif iph = ip_hdr(skb); diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index 96fa26b20b67..9a9884a39c0e 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -112,7 +112,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par) * know, matches should not alter packets, but we are doing this here * because we would need to add a PKTTYPE target for this sole purpose. */ - if (!xt_cluster_is_multicast_addr(skb, par->family) && + if (!xt_cluster_is_multicast_addr(skb, xt_family(par)) && skb->pkt_type == PACKET_MULTICAST) { pskb->pkt_type = PACKET_HOST; } diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index b6dc322593a3..bb3845339efd 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -317,7 +317,7 @@ static int count_them(struct net *net, static bool connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = par->net; + struct net *net = xt_net(par); const struct xt_connlimit_info *info = par->matchinfo; union nf_inet_addr addr; struct nf_conntrack_tuple tuple; @@ -332,11 +332,11 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; zone = nf_ct_zone(ct); } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), - par->family, net, &tuple)) { + xt_family(par), net, &tuple)) { goto hotdrop; } - if (par->family == NFPROTO_IPV6) { + if (xt_family(par) == NFPROTO_IPV6) { const struct ipv6hdr *iph = ipv6_hdr(skb); memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ? &iph->daddr : &iph->saddr, sizeof(addr.ip6)); @@ -347,7 +347,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) } connections = count_them(net, info->data, tuple_ptr, &addr, - &info->mask, par->family, zone); + &info->mask, xt_family(par), zone); if (connections == 0) /* kmalloc failed, drop it entirely */ goto hotdrop; diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index a3b8f697cfc5..2dea15ebc55b 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -200,22 +200,22 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, return false; if (info->match_flags & XT_CONNTRACK_ORIGSRC) - if (conntrack_mt_origsrc(ct, info, par->family) ^ + if (conntrack_mt_origsrc(ct, info, xt_family(par)) ^ !(info->invert_flags & XT_CONNTRACK_ORIGSRC)) return false; if (info->match_flags & XT_CONNTRACK_ORIGDST) - if (conntrack_mt_origdst(ct, info, par->family) ^ + if (conntrack_mt_origdst(ct, info, xt_family(par)) ^ !(info->invert_flags & XT_CONNTRACK_ORIGDST)) return false; if (info->match_flags & XT_CONNTRACK_REPLSRC) - if (conntrack_mt_replsrc(ct, info, par->family) ^ + if (conntrack_mt_replsrc(ct, info, xt_family(par)) ^ !(info->invert_flags & XT_CONNTRACK_REPLSRC)) return false; if (info->match_flags & XT_CONNTRACK_REPLDST) - if (conntrack_mt_repldst(ct, info, par->family) ^ + if (conntrack_mt_repldst(ct, info, xt_family(par)) ^ !(info->invert_flags & XT_CONNTRACK_REPLDST)) return false; diff --git a/net/netfilter/xt_devgroup.c b/net/netfilter/xt_devgroup.c index d9202cdd25c9..96ebe1cdefec 100644 --- a/net/netfilter/xt_devgroup.c +++ b/net/netfilter/xt_devgroup.c @@ -24,12 +24,12 @@ static bool devgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_devgroup_info *info = par->matchinfo; if (info->flags & XT_DEVGROUP_MATCH_SRC && - (((info->src_group ^ par->in->group) & info->src_mask ? 1 : 0) ^ + (((info->src_group ^ xt_in(par)->group) & info->src_mask ? 1 : 0) ^ ((info->flags & XT_DEVGROUP_INVERT_SRC) ? 1 : 0))) return false; if (info->flags & XT_DEVGROUP_MATCH_DST && - (((info->dst_group ^ par->out->group) & info->dst_mask ? 1 : 0) ^ + (((info->dst_group ^ xt_out(par)->group) & info->dst_mask ? 1 : 0) ^ ((info->flags & XT_DEVGROUP_INVERT_DST) ? 1 : 0))) return false; diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c index 64670fc5d0e1..236ac8008909 100644 --- a/net/netfilter/xt_dscp.c +++ b/net/netfilter/xt_dscp.c @@ -58,7 +58,7 @@ static bool tos_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_tos_match_info *info = par->matchinfo; - if (par->family == NFPROTO_IPV4) + if (xt_family(par) == NFPROTO_IPV4) return ((ip_hdr(skb)->tos & info->tos_mask) == info->tos_value) ^ !!info->invert; else diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c index 71a9d95e0a81..0fdc89064488 100644 --- a/net/netfilter/xt_ipvs.c +++ b/net/netfilter/xt_ipvs.c @@ -48,9 +48,9 @@ static bool ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_ipvs_mtinfo *data = par->matchinfo; - struct netns_ipvs *ipvs = net_ipvs(par->net); + struct netns_ipvs *ipvs = net_ipvs(xt_net(par)); /* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */ - const u_int8_t family = par->family; + const u_int8_t family = xt_family(par); struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index cf327593852a..cc0518fe598e 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -26,7 +26,7 @@ static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par) nfnl_acct_update(skb, info->nfacct); - overquota = nfnl_acct_overquota(par->net, skb, info->nfacct); + overquota = nfnl_acct_overquota(xt_net(par), skb, info->nfacct); return overquota == NFACCT_UNDERQUOTA ? false : true; } diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 2455b69b5810..c05fefcec238 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -201,7 +201,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) unsigned char opts[MAX_IPOPTLEN]; const struct xt_osf_finger *kf; const struct xt_osf_user_finger *f; - struct net *net = p->net; + struct net *net = xt_net(p); if (!info) return false; @@ -326,8 +326,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) fcount++; if (info->flags & XT_OSF_LOG) - nf_log_packet(net, p->family, p->hooknum, skb, - p->in, p->out, NULL, + nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, + xt_in(p), xt_out(p), NULL, "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n", f->genre, f->version, f->subtype, &ip->saddr, ntohs(tcp->source), @@ -341,8 +341,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) rcu_read_unlock(); if (!fcount && (info->flags & XT_OSF_LOG)) - nf_log_packet(net, p->family, p->hooknum, skb, p->in, - p->out, NULL, + nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, xt_in(p), + xt_out(p), NULL, "Remote OS is not known: %pI4:%u -> %pI4:%u\n", &ip->saddr, ntohs(tcp->source), &ip->daddr, ntohs(tcp->dest)); diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index a20e731b5b6c..16477df45b3b 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -63,7 +63,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_owner_match_info *info = par->matchinfo; const struct file *filp; struct sock *sk = skb_to_full_sk(skb); - struct net *net = par->net; + struct net *net = xt_net(par); if (sk == NULL || sk->sk_socket == NULL) return (info->match ^ info->invert) == 0; diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c index 5b645cb598fc..57efb703ff18 100644 --- a/net/netfilter/xt_pkttype.c +++ b/net/netfilter/xt_pkttype.c @@ -30,10 +30,10 @@ pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par) if (skb->pkt_type != PACKET_LOOPBACK) type = skb->pkt_type; - else if (par->family == NFPROTO_IPV4 && + else if (xt_family(par) == NFPROTO_IPV4 && ipv4_is_multicast(ip_hdr(skb)->daddr)) type = PACKET_MULTICAST; - else if (par->family == NFPROTO_IPV6 && + else if (xt_family(par) == NFPROTO_IPV6 && ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF) type = PACKET_MULTICAST; else diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index f23e97bb42d7..2b4ab189bba7 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -116,9 +116,9 @@ policy_mt(const struct sk_buff *skb, struct xt_action_param *par) int ret; if (info->flags & XT_POLICY_MATCH_IN) - ret = match_policy_in(skb, info, par->family); + ret = match_policy_in(skb, info, xt_family(par)); else - ret = match_policy_out(skb, info, par->family); + ret = match_policy_out(skb, info, xt_family(par)); if (ret < 0) ret = info->flags & XT_POLICY_MATCH_NONE ? true : false; diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index e3b7a09b103e..bf250000e084 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -236,7 +236,7 @@ static void recent_table_flush(struct recent_table *t) static bool recent_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = par->net; + struct net *net = xt_net(par); struct recent_net *recent_net = recent_pernet(net); const struct xt_recent_mtinfo_v1 *info = par->matchinfo; struct recent_table *t; @@ -245,7 +245,7 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par) u_int8_t ttl; bool ret = info->invert; - if (par->family == NFPROTO_IPV4) { + if (xt_family(par) == NFPROTO_IPV4) { const struct iphdr *iph = ip_hdr(skb); if (info->side == XT_RECENT_DEST) @@ -266,7 +266,7 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par) } /* use TTL as seen before forwarding */ - if (par->out != NULL && skb->sk == NULL) + if (xt_out(par) != NULL && skb->sk == NULL) ttl++; spin_lock_bh(&recent_lock); @@ -274,12 +274,12 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par) nf_inet_addr_mask(&addr, &addr_mask, &t->mask); - e = recent_entry_lookup(t, &addr_mask, par->family, + e = recent_entry_lookup(t, &addr_mask, xt_family(par), (info->check_set & XT_RECENT_TTL) ? ttl : 0); if (e == NULL) { if (!(info->check_set & XT_RECENT_SET)) goto out; - e = recent_entry_init(t, &addr_mask, par->family, ttl); + e = recent_entry_init(t, &addr_mask, xt_family(par), ttl); if (e == NULL) par->hotdrop = true; ret = !ret; diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 5669e5b453f4..1bfede7be418 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -55,7 +55,7 @@ set_match_v0(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v0 *info = par->matchinfo; - ADT_OPT(opt, par->family, info->match_set.u.compat.dim, + ADT_OPT(opt, xt_family(par), info->match_set.u.compat.dim, info->match_set.u.compat.flags, 0, UINT_MAX); return match_set(info->match_set.index, skb, par, &opt, @@ -118,7 +118,7 @@ set_match_v1(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_set_info_match_v1 *info = par->matchinfo; - ADT_OPT(opt, par->family, info->match_set.dim, + ADT_OPT(opt, xt_family(par), info->match_set.dim, info->match_set.flags, 0, UINT_MAX); if (opt.flags & IPSET_RETURN_NOMATCH) @@ -184,7 +184,7 @@ set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_set_info_match_v3 *info = par->matchinfo; int ret; - ADT_OPT(opt, par->family, info->match_set.dim, + ADT_OPT(opt, xt_family(par), info->match_set.dim, info->match_set.flags, info->flags, UINT_MAX); if (info->packets.op != IPSET_COUNTER_NONE || @@ -231,7 +231,7 @@ set_match_v4(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_set_info_match_v4 *info = par->matchinfo; int ret; - ADT_OPT(opt, par->family, info->match_set.dim, + ADT_OPT(opt, xt_family(par), info->match_set.dim, info->match_set.flags, info->flags, UINT_MAX); if (info->packets.op != IPSET_COUNTER_NONE || @@ -259,9 +259,9 @@ set_target_v0(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v0 *info = par->targinfo; - ADT_OPT(add_opt, par->family, info->add_set.u.compat.dim, + ADT_OPT(add_opt, xt_family(par), info->add_set.u.compat.dim, info->add_set.u.compat.flags, 0, UINT_MAX); - ADT_OPT(del_opt, par->family, info->del_set.u.compat.dim, + ADT_OPT(del_opt, xt_family(par), info->del_set.u.compat.dim, info->del_set.u.compat.flags, 0, UINT_MAX); if (info->add_set.index != IPSET_INVALID_ID) @@ -332,9 +332,9 @@ set_target_v1(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v1 *info = par->targinfo; - ADT_OPT(add_opt, par->family, info->add_set.dim, + ADT_OPT(add_opt, xt_family(par), info->add_set.dim, info->add_set.flags, 0, UINT_MAX); - ADT_OPT(del_opt, par->family, info->del_set.dim, + ADT_OPT(del_opt, xt_family(par), info->del_set.dim, info->del_set.flags, 0, UINT_MAX); if (info->add_set.index != IPSET_INVALID_ID) @@ -401,9 +401,9 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_set_info_target_v2 *info = par->targinfo; - ADT_OPT(add_opt, par->family, info->add_set.dim, + ADT_OPT(add_opt, xt_family(par), info->add_set.dim, info->add_set.flags, info->flags, info->timeout); - ADT_OPT(del_opt, par->family, info->del_set.dim, + ADT_OPT(del_opt, xt_family(par), info->del_set.dim, info->del_set.flags, 0, UINT_MAX); /* Normalize to fit into jiffies */ @@ -429,11 +429,11 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) const struct xt_set_info_target_v3 *info = par->targinfo; int ret; - ADT_OPT(add_opt, par->family, info->add_set.dim, + ADT_OPT(add_opt, xt_family(par), info->add_set.dim, info->add_set.flags, info->flags, info->timeout); - ADT_OPT(del_opt, par->family, info->del_set.dim, + ADT_OPT(del_opt, xt_family(par), info->del_set.dim, info->del_set.flags, 0, UINT_MAX); - ADT_OPT(map_opt, par->family, info->map_set.dim, + ADT_OPT(map_opt, xt_family(par), info->map_set.dim, info->map_set.flags, 0, UINT_MAX); /* Normalize to fit into jiffies */ diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 018c369c9f0d..2198914707f5 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -57,7 +57,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, struct sock *sk = skb->sk; if (!sk) - sk = nf_sk_lookup_slow_v4(par->net, skb, par->in); + sk = nf_sk_lookup_slow_v4(xt_net(par), skb, xt_in(par)); if (sk) { bool wildcard; bool transparent = true; @@ -114,7 +114,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) struct sock *sk = skb->sk; if (!sk) - sk = nf_sk_lookup_slow_v6(par->net, skb, par->in); + sk = nf_sk_lookup_slow_v6(xt_net(par), skb, xt_in(par)); if (sk) { bool wildcard; bool transparent = true; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 378c1c976058..ce7ea6c1c50d 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -213,6 +213,12 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, int ret = 0, result = 0; struct tcf_ipt *ipt = to_ipt(a); struct xt_action_param par; + struct nf_hook_state state = { + .net = dev_net(skb->dev), + .in = skb->dev, + .hook = ipt->tcfi_hook, + .pf = NFPROTO_IPV4, + }; if (skb_unclone(skb, GFP_ATOMIC)) return TC_ACT_UNSPEC; @@ -226,13 +232,9 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, * worry later - danger - this API seems to have changed * from earlier kernels */ - par.net = dev_net(skb->dev); - par.in = skb->dev; - par.out = NULL; - par.hooknum = ipt->tcfi_hook; + par.state = &state; par.target = ipt->tcfi_t->u.kernel.target; par.targinfo = ipt->tcfi_t->data; - par.family = NFPROTO_IPV4; ret = par.target->target(skb, &par); switch (ret) { diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index c66ca9400ab4..c1b23e3060b8 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -57,17 +57,20 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, struct xt_action_param acpar; const struct xt_set_info *set = (const void *) em->data; struct net_device *dev, *indev = NULL; + struct nf_hook_state state = { + .net = em->net, + }; int ret, network_offset; switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): - acpar.family = NFPROTO_IPV4; + state.pf = NFPROTO_IPV4; if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) return 0; acpar.thoff = ip_hdrlen(skb); break; case htons(ETH_P_IPV6): - acpar.family = NFPROTO_IPV6; + state.pf = NFPROTO_IPV6; if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) return 0; /* doesn't call ipv6_find_hdr() because ipset doesn't use thoff, yet */ @@ -77,9 +80,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, return 0; } - acpar.hooknum = 0; - - opt.family = acpar.family; + opt.family = state.pf; opt.dim = set->dim; opt.flags = set->flags; opt.cmdflags = 0; @@ -95,9 +96,9 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, if (skb->skb_iif) indev = dev_get_by_index_rcu(em->net, skb->skb_iif); - acpar.net = em->net; - acpar.in = indev ? indev : dev; - acpar.out = dev; + state.in = indev ? indev : dev; + state.out = dev; + acpar.state = &state; ret = ip_set_test(set->index, skb, &acpar, &opt); -- cgit v1.2.3 From 01886bd91f1ba418ce669dfe97a06ca9504e482a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 3 Nov 2016 10:56:35 +0100 Subject: netfilter: remove hook_entries field from nf_hook_state This field is only useful for nf_queue, so store it in the nf_queue_entry structure instead, away from the core path. Pass hook_head to nf_hook_slow(). Since we always have a valid entry on the first iteration in nf_iterate(), we can use 'do { ... } while (entry)' loop instead. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 10 ++++------ include/linux/netfilter_ingress.h | 4 ++-- include/net/netfilter/nf_queue.h | 1 + net/bridge/br_netfilter_hooks.c | 4 ++-- net/bridge/netfilter/ebtable_broute.c | 2 +- net/netfilter/core.c | 9 ++++----- net/netfilter/nf_queue.c | 13 +++++-------- net/netfilter/nfnetlink_queue.c | 2 +- 8 files changed, 20 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index e0d000f6c9bf..69230140215b 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -54,7 +54,6 @@ struct nf_hook_state { struct net_device *out; struct sock *sk; struct net *net; - struct nf_hook_entry __rcu *hook_entries; int (*okfn)(struct net *, struct sock *, struct sk_buff *); }; @@ -81,7 +80,6 @@ struct nf_hook_entry { }; static inline void nf_hook_state_init(struct nf_hook_state *p, - struct nf_hook_entry *hook_entry, unsigned int hook, u_int8_t pf, struct net_device *indev, @@ -96,7 +94,6 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, p->out = outdev; p->sk = sk; p->net = net; - RCU_INIT_POINTER(p->hook_entries, hook_entry); p->okfn = okfn; } @@ -150,7 +147,8 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg); extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #endif -int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); +int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, + struct nf_hook_entry *entry); /** * nf_hook - call a netfilter hook @@ -179,10 +177,10 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, if (hook_head) { struct nf_hook_state state; - nf_hook_state_init(&state, hook_head, hook, pf, indev, outdev, + nf_hook_state_init(&state, hook, pf, indev, outdev, sk, net, okfn); - ret = nf_hook_slow(skb, &state); + ret = nf_hook_slow(skb, &state, hook_head); } rcu_read_unlock(); diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index fd44e4131710..2dc3b49b804a 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -26,10 +26,10 @@ static inline int nf_hook_ingress(struct sk_buff *skb) if (unlikely(!e)) return 0; - nf_hook_state_init(&state, e, NF_NETDEV_INGRESS, + nf_hook_state_init(&state, NF_NETDEV_INGRESS, NFPROTO_NETDEV, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); - return nf_hook_slow(skb, &state); + return nf_hook_slow(skb, &state, e); } static inline void nf_hook_ingress_init(struct net_device *dev) diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 2280cfe86c56..09948d10e38e 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -12,6 +12,7 @@ struct nf_queue_entry { unsigned int id; struct nf_hook_state state; + struct nf_hook_entry *hook; u16 size; /* sizeof(entry) + saved route keys */ /* extra space to store route keys */ diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 7e3645fa6339..8155bd2a5138 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -1018,10 +1018,10 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, /* We may already have this, but read-locks nest anyway */ rcu_read_lock(); - nf_hook_state_init(&state, elem, hook, NFPROTO_BRIDGE, indev, outdev, + nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); - ret = nf_hook_slow(skb, &state); + ret = nf_hook_slow(skb, &state, elem); rcu_read_unlock(); if (ret == 1) ret = okfn(net, sk, skb); diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 599679e3498d..8fe36dc3aab2 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -53,7 +53,7 @@ static int ebt_broute(struct sk_buff *skb) struct nf_hook_state state; int ret; - nf_hook_state_init(&state, NULL, NF_BR_BROUTING, + nf_hook_state_init(&state, NF_BR_BROUTING, NFPROTO_BRIDGE, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 64623374bc5f..ebece48b8392 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -308,7 +308,7 @@ unsigned int nf_iterate(struct sk_buff *skb, { unsigned int verdict; - while (*entryp) { + do { repeat: verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state); if (verdict != NF_ACCEPT) { @@ -317,20 +317,19 @@ repeat: goto repeat; } *entryp = rcu_dereference((*entryp)->next); - } + } while (*entryp); return NF_ACCEPT; } /* Returns 1 if okfn() needs to be executed by the caller, * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */ -int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state) +int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, + struct nf_hook_entry *entry) { - struct nf_hook_entry *entry; unsigned int verdict; int ret; - entry = rcu_dereference(state->hook_entries); next_hook: verdict = nf_iterate(skb, state, &entry); switch (verdict & NF_VERDICT_MASK) { diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 0fb38966e5bf..2e39e38ae1c7 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -108,7 +108,7 @@ void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry) } static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, - unsigned int queuenum) + struct nf_hook_entry *hook_entry, unsigned int queuenum) { int status = -ENOENT; struct nf_queue_entry *entry = NULL; @@ -136,6 +136,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, *entry = (struct nf_queue_entry) { .skb = skb, .state = *state, + .hook = hook_entry, .size = sizeof(*entry) + afinfo->route_key_size, }; @@ -163,8 +164,7 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, struct nf_hook_entry *entry = *entryp; int ret; - RCU_INIT_POINTER(state->hook_entries, entry); - ret = __nf_queue(skb, state, verdict >> NF_VERDICT_QBITS); + ret = __nf_queue(skb, state, entry, verdict >> NF_VERDICT_QBITS); if (ret < 0) { if (ret == -ESRCH && (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) { @@ -179,15 +179,12 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { - struct nf_hook_entry *hook_entry; + struct nf_hook_entry *hook_entry = entry->hook; + struct nf_hook_ops *elem = &hook_entry->ops; struct sk_buff *skb = entry->skb; const struct nf_afinfo *afinfo; - struct nf_hook_ops *elem; int err; - hook_entry = rcu_dereference(entry->state.hook_entries); - elem = &hook_entry->ops; - nf_queue_entry_release_refs(entry); /* Continue traversal iff userspace said ok... */ diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 5379f788a372..1e33115b399f 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -919,7 +919,7 @@ static struct notifier_block nfqnl_dev_notifier = { static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long entry_ptr) { - return rcu_access_pointer(entry->state.hook_entries) == + return rcu_access_pointer(entry->hook) == (struct nf_hook_entry *)entry_ptr; } -- cgit v1.2.3 From 0cc0aa614b4c24b21b2492c0a1753035ee8c6edb Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 2 Nov 2016 11:02:17 -0400 Subject: ipv6: add IPV6_RECVFRAGSIZE cmsg When reading a datagram or raw packet that arrived fragmented, expose the maximum fragment size if recorded to allow applications to estimate receive path MTU. At this point, the field is only recorded when ipv6 connection tracking is enabled. A follow-up patch will record this field also in the ipv6 input path. Tested using the test for IP_RECVFRAGSIZE plus ip netns exec to ip addr add dev veth1 fc07::1/64 ip netns exec from ip addr add dev veth0 fc07::2/64 ip netns exec to ./recv_cmsg_recvfragsize -6 -u -p 6000 & ip netns exec from nc -q 1 -u fc07::1 6000 < payload Both with and without enabling connection tracking ip6tables -A INPUT -m state --state NEW -p udp -j LOG Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/ipv6.h | 5 +++-- include/uapi/linux/in6.h | 1 + net/ipv6/datagram.c | 5 +++++ net/ipv6/ipv6_sockglue.c | 8 ++++++++ 4 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ca1ad9ebbc92..1afb6e8d35c3 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -229,8 +229,9 @@ struct ipv6_pinfo { rxflow:1, rxtclass:1, rxpmtu:1, - rxorigdstaddr:1; - /* 2 bits hole */ + rxorigdstaddr:1, + recvfragsize:1; + /* 1 bits hole */ } bits; __u16 all; } rxopt; diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index b39ea4f2e701..46444f8fbee4 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -283,6 +283,7 @@ struct in6_flowlabel_req { #define IPV6_RECVORIGDSTADDR IPV6_ORIGDSTADDR #define IPV6_TRANSPARENT 75 #define IPV6_UNICAST_IF 76 +#define IPV6_RECVFRAGSIZE 77 /* * Multicast Routing: diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 37874e2f30ed..620c79a0130a 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -715,6 +715,11 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6); } } + if (np->rxopt.bits.recvfragsize && opt->frag_max_size) { + int val = opt->frag_max_size; + + put_cmsg(msg, SOL_IPV6, IPV6_RECVFRAGSIZE, sizeof(val), &val); + } } void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 636ec56f5f50..6c126780fcf2 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -868,6 +868,10 @@ pref_skip_coa: np->autoflowlabel = valbool; retv = 0; break; + case IPV6_RECVFRAGSIZE: + np->rxopt.bits.recvfragsize = valbool; + retv = 0; + break; } release_sock(sk); @@ -1310,6 +1314,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->autoflowlabel; break; + case IPV6_RECVFRAGSIZE: + val = np->rxopt.bits.recvfragsize; + break; + default: return -ENOPROTOOPT; } -- cgit v1.2.3 From 50d24c34403c62ad29e8b6db559d491bae20b4b7 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 3 Nov 2016 17:03:53 -0700 Subject: block: immediately dispatch big size request Currently block plug holds up to 16 non-mergeable requests. This makes sense if the request size is small, eg, reduce lock contention. But if request size is big enough, we don't need to worry about lock contention. Holding such request makes no sense and it lows the disk utilization. In practice, this improves 10% throughput for my raid5 sequential write workload. The size (128k) is arbitrary right now, but it makes sure lock contention is small. This probably could be more intelligent, eg, check average request size holded. Since this is mainly for sequential IO, probably not worthy. V2: check the last request instead of the first request, so as long as there is one big size request we flush the plug. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/blk-core.c | 4 +++- include/linux/blkdev.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index 0bfaa54d3e9f..2deca48a4a05 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1746,7 +1746,9 @@ get_rq: if (!request_count) trace_block_plug(q); else { - if (request_count >= BLK_MAX_REQUEST_COUNT) { + struct request *last = list_entry_rq(plug->list.prev); + if (request_count >= BLK_MAX_REQUEST_COUNT || + blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE) { blk_flush_plug_list(plug, false); trace_block_plug(q); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 13d893a69b46..9189a2d5c392 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1173,6 +1173,7 @@ struct blk_plug { struct list_head cb_list; /* md requires an unplug callback */ }; #define BLK_MAX_REQUEST_COUNT 16 +#define BLK_PLUG_FLUSH_SIZE (128 * 1024) struct blk_plug_cb; typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool); -- cgit v1.2.3 From 1b5b42216469b05ef4b5916cb40b127dfab1da88 Mon Sep 17 00:00:00 2001 From: Axel Haslam Date: Thu, 3 Nov 2016 12:11:42 +0100 Subject: regulator: core: Add new API to poll for error conditions Regulator consumers can receive event notifications when errors are reported to the driver, but currently, there is no way for a regulator consumer to know when the error is over. To allow a regulator consumer to poll for error conditions add a new API: regulator_get_error_flags. Signed-off-by: Axel Haslam Signed-off-by: Mark Brown --- drivers/regulator/core.c | 33 +++++++++++++++++++++++++++++++++ include/linux/regulator/consumer.h | 26 ++++++++++++++++++++++++++ include/linux/regulator/driver.h | 4 ++++ 3 files changed, 63 insertions(+) (limited to 'include/linux') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 67426c0477d3..08260c215895 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -3359,6 +3359,39 @@ unsigned int regulator_get_mode(struct regulator *regulator) } EXPORT_SYMBOL_GPL(regulator_get_mode); +static int _regulator_get_error_flags(struct regulator_dev *rdev, + unsigned int *flags) +{ + int ret; + + mutex_lock(&rdev->mutex); + + /* sanity check */ + if (!rdev->desc->ops->get_error_flags) { + ret = -EINVAL; + goto out; + } + + ret = rdev->desc->ops->get_error_flags(rdev, flags); +out: + mutex_unlock(&rdev->mutex); + return ret; +} + +/** + * regulator_get_error_flags - get regulator error information + * @regulator: regulator source + * @flags: pointer to store error flags + * + * Get the current regulator error information. + */ +int regulator_get_error_flags(struct regulator *regulator, + unsigned int *flags) +{ + return _regulator_get_error_flags(regulator->rdev, flags); +} +EXPORT_SYMBOL_GPL(regulator_get_error_flags); + /** * regulator_set_load - set regulator load * @regulator: regulator source diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 692108222271..528eb1f5273e 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -120,6 +120,25 @@ struct regmap; #define REGULATOR_EVENT_PRE_DISABLE 0x400 #define REGULATOR_EVENT_ABORT_DISABLE 0x800 +/* + * Regulator errors that can be queried using regulator_get_error_flags + * + * UNDER_VOLTAGE Regulator output is under voltage. + * OVER_CURRENT Regulator output current is too high. + * REGULATION_OUT Regulator output is out of regulation. + * FAIL Regulator output has failed. + * OVER_TEMP Regulator over temp. + * + * NOTE: These errors can be OR'ed together. + */ + +#define REGULATOR_ERROR_UNDER_VOLTAGE BIT(1) +#define REGULATOR_ERROR_OVER_CURRENT BIT(2) +#define REGULATOR_ERROR_REGULATION_OUT BIT(3) +#define REGULATOR_ERROR_FAIL BIT(4) +#define REGULATOR_ERROR_OVER_TEMP BIT(5) + + /** * struct pre_voltage_change_data - Data sent with PRE_VOLTAGE_CHANGE event * @@ -237,6 +256,8 @@ int regulator_get_current_limit(struct regulator *regulator); int regulator_set_mode(struct regulator *regulator, unsigned int mode); unsigned int regulator_get_mode(struct regulator *regulator); +int regulator_get_error_flags(struct regulator *regulator, + unsigned int *flags); int regulator_set_load(struct regulator *regulator, int load_uA); int regulator_allow_bypass(struct regulator *regulator, bool allow); @@ -477,6 +498,11 @@ static inline unsigned int regulator_get_mode(struct regulator *regulator) return REGULATOR_MODE_NORMAL; } +static inline int regulator_get_error_flags(struct regulator *regulator) +{ + return -EINVAL; +} + static inline int regulator_set_load(struct regulator *regulator, int load_uA) { return REGULATOR_MODE_NORMAL; diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 37b532410528..dac8e7b16bc6 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -100,6 +100,7 @@ struct regulator_linear_range { * * @set_mode: Set the configured operating mode for the regulator. * @get_mode: Get the configured operating mode for the regulator. + * @get_error_flags: Get the current error(s) for the regulator. * @get_status: Return actual (not as-configured) status of regulator, as a * REGULATOR_STATUS value (or negative errno) * @get_optimum_mode: Get the most efficient operating mode for the regulator @@ -169,6 +170,9 @@ struct regulator_ops { int (*set_mode) (struct regulator_dev *, unsigned int mode); unsigned int (*get_mode) (struct regulator_dev *); + /* retrieve current error flags on the regulator */ + int (*get_error_flags)(struct regulator_dev *, unsigned int *flags); + /* Time taken to enable or set voltage on the regulator */ int (*enable_time) (struct regulator_dev *); int (*set_ramp_delay) (struct regulator_dev *, int ramp_delay); -- cgit v1.2.3 From 68f929ff2654bced015ccb9b5555667f46f88dfa Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 3 Nov 2016 17:12:06 +0000 Subject: debugfs: constify argument to debugfs_real_fops() seq_file users can only access const version of file pointer, because the ->file member of struct seq_operations is marked as such. Make parameter to debugfs_real_fops() const. CC: Greg Kroah-Hartman CC: Nicolai Stange CC: Christian Lamparter CC: LKML Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/debugfs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 4d3f0d1aec73..bf1907d96097 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -52,7 +52,8 @@ extern struct srcu_struct debugfs_srcu; * Must only be called under the protection established by * debugfs_use_file_start(). */ -static inline const struct file_operations *debugfs_real_fops(struct file *filp) +static inline const struct file_operations * +debugfs_real_fops(const struct file *filp) __must_hold(&debugfs_srcu) { /* -- cgit v1.2.3 From 29f3ad7d8380364c86556eedf4eedd3b3d4921dc Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 4 Nov 2016 18:08:11 +0100 Subject: fs: Provide function to unmap metadata for a range of blocks Provide function equivalent to unmap_underlying_metadata() for a range of blocks. We somewhat optimize the function to use pagevec lookups instead of looking up buffer heads one by one and use page lock to pin buffer heads instead of mapping's private_lock to improve scalability. Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- fs/buffer.c | 76 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/buffer_head.h | 2 ++ 2 files changed, 78 insertions(+) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index af5776da814a..f8beca55240a 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -43,6 +43,7 @@ #include #include #include +#include #include static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); @@ -1636,6 +1637,81 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block) } EXPORT_SYMBOL(unmap_underlying_metadata); +/** + * clean_bdev_aliases: clean a range of buffers in block device + * @bdev: Block device to clean buffers in + * @block: Start of a range of blocks to clean + * @len: Number of blocks to clean + * + * We are taking a range of blocks for data and we don't want writeback of any + * buffer-cache aliases starting from return from this function and until the + * moment when something will explicitly mark the buffer dirty (hopefully that + * will not happen until we will free that block ;-) We don't even need to mark + * it not-uptodate - nobody can expect anything from a newly allocated buffer + * anyway. We used to use unmap_buffer() for such invalidation, but that was + * wrong. We definitely don't want to mark the alias unmapped, for example - it + * would confuse anyone who might pick it with bread() afterwards... + * + * Also.. Note that bforget() doesn't lock the buffer. So there can be + * writeout I/O going on against recently-freed buffers. We don't wait on that + * I/O in bforget() - it's more efficient to wait on the I/O only if we really + * need to. That happens here. + */ +void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len) +{ + struct inode *bd_inode = bdev->bd_inode; + struct address_space *bd_mapping = bd_inode->i_mapping; + struct pagevec pvec; + pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits); + pgoff_t end; + int i; + struct buffer_head *bh; + struct buffer_head *head; + + end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits); + pagevec_init(&pvec, 0); + while (index <= end && pagevec_lookup(&pvec, bd_mapping, index, + min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; + + index = page->index; + if (index > end) + break; + if (!page_has_buffers(page)) + continue; + /* + * We use page lock instead of bd_mapping->private_lock + * to pin buffers here since we can afford to sleep and + * it scales better than a global spinlock lock. + */ + lock_page(page); + /* Recheck when the page is locked which pins bhs */ + if (!page_has_buffers(page)) + goto unlock_page; + head = page_buffers(page); + bh = head; + do { + if (!buffer_mapped(bh)) + goto next; + if (bh->b_blocknr >= block + len) + break; + clear_buffer_dirty(bh); + wait_on_buffer(bh); + clear_buffer_req(bh); +next: + bh = bh->b_this_page; + } while (bh != head); +unlock_page: + unlock_page(page); + } + pagevec_release(&pvec); + cond_resched(); + index++; + } +} +EXPORT_SYMBOL(clean_bdev_aliases); + /* * Size is a power-of-two in the range 512..PAGE_SIZE, * and the case we care about most is PAGE_SIZE. diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index ebbacd14d450..9c9c73ce7d4f 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -169,6 +169,8 @@ void invalidate_inode_buffers(struct inode *); int remove_inode_buffers(struct inode *inode); int sync_mapping_buffers(struct address_space *mapping); void unmap_underlying_metadata(struct block_device *bdev, sector_t block); +void clean_bdev_aliases(struct block_device *bdev, sector_t block, + sector_t len); void mark_buffer_async_write(struct buffer_head *bh); void __wait_on_buffer(struct buffer_head *); -- cgit v1.2.3 From e64855c6cfaa0a80c1b71c5f647cb792dc436668 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 4 Nov 2016 18:08:15 +0100 Subject: fs: Add helper to clean bdev aliases under a bh and use it Add a helper function that clears buffer heads from a block device aliasing passed bh. Use this helper function from filesystems instead of the original unmap_underlying_metadata() to save some boiler plate code and also have a better name for the functionalily since it is not unmapping anything for a *long* time. Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- fs/buffer.c | 8 +++----- fs/ext4/inode.c | 3 +-- fs/ext4/page-io.c | 2 +- fs/mpage.c | 3 +-- fs/ntfs/aops.c | 2 +- fs/ntfs/file.c | 5 ++--- fs/ocfs2/aops.c | 2 +- fs/ufs/balloc.c | 3 +-- fs/ufs/inode.c | 3 +-- include/linux/buffer_head.h | 4 ++++ 10 files changed, 16 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index f8beca55240a..912d70169fca 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1821,8 +1821,7 @@ int __block_write_full_page(struct inode *inode, struct page *page, if (buffer_new(bh)) { /* blockdev mappings never come here */ clear_buffer_new(bh); - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); + clean_bdev_bh_alias(bh); } } bh = bh->b_this_page; @@ -2068,8 +2067,7 @@ int __block_write_begin_int(struct page *page, loff_t pos, unsigned len, } if (buffer_new(bh)) { - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); + clean_bdev_bh_alias(bh); if (PageUptodate(page)) { clear_buffer_new(bh); set_buffer_uptodate(bh); @@ -2709,7 +2707,7 @@ int nobh_write_begin(struct address_space *mapping, if (!buffer_mapped(bh)) is_mapped_to_disk = 0; if (buffer_new(bh)) - unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); + clean_bdev_bh_alias(bh); if (PageUptodate(page)) { set_buffer_uptodate(bh); continue; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7c7cc4ae4b8e..2f8127601bef 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1123,8 +1123,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, if (err) break; if (buffer_new(bh)) { - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); + clean_bdev_bh_alias(bh); if (PageUptodate(page)) { clear_buffer_new(bh); set_buffer_uptodate(bh); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index e0b3b54cdef3..f28fd6483e04 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -457,7 +457,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, } if (buffer_new(bh)) { clear_buffer_new(bh); - unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); + clean_bdev_bh_alias(bh); } set_buffer_async_write(bh); nr_to_submit++; diff --git a/fs/mpage.c b/fs/mpage.c index 98fc11aa7e0b..28af984a3d96 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -555,8 +555,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc, if (mpd->get_block(inode, block_in_file, &map_bh, 1)) goto confused; if (buffer_new(&map_bh)) - unmap_underlying_metadata(map_bh.b_bdev, - map_bh.b_blocknr); + clean_bdev_bh_alias(&map_bh); if (buffer_boundary(&map_bh)) { boundary_block = map_bh.b_blocknr; boundary_bdev = map_bh.b_bdev; diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index d0cf6fee5c77..cc91856b5e2d 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -765,7 +765,7 @@ lock_retry_remap: } // TODO: Instantiate the hole. // clear_buffer_new(bh); - // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); + // clean_bdev_bh_alias(bh); ntfs_error(vol->sb, "Writing into sparse regions is " "not supported yet. Sorry."); err = -EOPNOTSUPP; diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index bf72a2c58b75..99510d811a8c 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -740,8 +740,7 @@ map_buffer_cached: set_buffer_uptodate(bh); if (unlikely(was_hole)) { /* We allocated the buffer. */ - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); + clean_bdev_bh_alias(bh); if (bh_end <= pos || bh_pos >= end) mark_buffer_dirty(bh); else @@ -784,7 +783,7 @@ map_buffer_cached: continue; } /* We allocated the buffer. */ - unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); + clean_bdev_bh_alias(bh); /* * If the buffer is fully outside the write, zero it, * set it uptodate, and mark it dirty so it gets diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index c5c5b9748ea3..e8f65eefffca 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -630,7 +630,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, if (!buffer_mapped(bh)) { map_bh(bh, inode->i_sb, *p_blkno); - unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); + clean_bdev_bh_alias(bh); } if (PageUptodate(page)) { diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index b035af54f538..a0376a2c1c29 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -307,8 +307,7 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg, (unsigned long long)(pos + newb), pos); bh->b_blocknr = newb + pos; - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); + clean_bdev_bh_alias(bh); mark_buffer_dirty(bh); ++j; bh = bh->b_this_page; diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 190d64be22ed..45ceb94e89e4 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -1070,8 +1070,7 @@ static int ufs_alloc_lastblock(struct inode *inode, loff_t size) if (buffer_new(bh)) { clear_buffer_new(bh); - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); + clean_bdev_bh_alias(bh); /* * we do not zeroize fragment, because of * if it maped to hole, it already contains zeroes diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 9c9c73ce7d4f..d1ab91fc6d43 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -171,6 +171,10 @@ int sync_mapping_buffers(struct address_space *mapping); void unmap_underlying_metadata(struct block_device *bdev, sector_t block); void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len); +static inline void clean_bdev_bh_alias(struct buffer_head *bh) +{ + clean_bdev_aliases(bh->b_bdev, bh->b_blocknr, 1); +} void mark_buffer_async_write(struct buffer_head *bh); void __wait_on_buffer(struct buffer_head *); -- cgit v1.2.3 From ce98321bf7d274a470642ef99e1d82512673ce7c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 4 Nov 2016 18:08:16 +0100 Subject: fs: Remove unmap_underlying_metadata Nobody is using this function anymore. Remove it. Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- fs/buffer.c | 32 -------------------------------- include/linux/buffer_head.h | 1 - 2 files changed, 33 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index 912d70169fca..1104ce8b4536 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1605,38 +1605,6 @@ void create_empty_buffers(struct page *page, } EXPORT_SYMBOL(create_empty_buffers); -/* - * We are taking a block for data and we don't want any output from any - * buffer-cache aliases starting from return from that function and - * until the moment when something will explicitly mark the buffer - * dirty (hopefully that will not happen until we will free that block ;-) - * We don't even need to mark it not-uptodate - nobody can expect - * anything from a newly allocated buffer anyway. We used to used - * unmap_buffer() for such invalidation, but that was wrong. We definitely - * don't want to mark the alias unmapped, for example - it would confuse - * anyone who might pick it with bread() afterwards... - * - * Also.. Note that bforget() doesn't lock the buffer. So there can - * be writeout I/O going on against recently-freed buffers. We don't - * wait on that I/O in bforget() - it's more efficient to wait on the I/O - * only if we really need to. That happens here. - */ -void unmap_underlying_metadata(struct block_device *bdev, sector_t block) -{ - struct buffer_head *old_bh; - - might_sleep(); - - old_bh = __find_get_block_slow(bdev, block); - if (old_bh) { - clear_buffer_dirty(old_bh); - wait_on_buffer(old_bh); - clear_buffer_req(old_bh); - __brelse(old_bh); - } -} -EXPORT_SYMBOL(unmap_underlying_metadata); - /** * clean_bdev_aliases: clean a range of buffers in block device * @bdev: Block device to clean buffers in diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index d1ab91fc6d43..d67ab83823ad 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -168,7 +168,6 @@ int inode_has_buffers(struct inode *); void invalidate_inode_buffers(struct inode *); int remove_inode_buffers(struct inode *inode); int sync_mapping_buffers(struct address_space *mapping); -void unmap_underlying_metadata(struct block_device *bdev, sector_t block); void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len); static inline void clean_bdev_bh_alias(struct buffer_head *bh) -- cgit v1.2.3 From c9329d8638cfa1a86faf4fb8bd4922a3d9c6c437 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Wed, 5 Oct 2016 14:34:40 +0530 Subject: mfd: ti_am335x_tscadc: store physical address store the physical address of the device in its priv to use it for DMA addressing in the client drivers. Signed-off-by: Mugunthan V N Acked-for-MFD-by: Lee Jones Signed-off-by: Jonathan Cameron --- drivers/mfd/ti_am335x_tscadc.c | 1 + include/linux/mfd/ti_am335x_tscadc.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c index c8f027b4ea4c..0f3fab47fe48 100644 --- a/drivers/mfd/ti_am335x_tscadc.c +++ b/drivers/mfd/ti_am335x_tscadc.c @@ -183,6 +183,7 @@ static int ti_tscadc_probe(struct platform_device *pdev) tscadc->irq = err; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + tscadc->tscadc_phys_base = res->start; tscadc->tscadc_base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(tscadc->tscadc_base)) return PTR_ERR(tscadc->tscadc_base); diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h index 7f55b8b41032..e45a208d9944 100644 --- a/include/linux/mfd/ti_am335x_tscadc.h +++ b/include/linux/mfd/ti_am335x_tscadc.h @@ -155,6 +155,7 @@ struct ti_tscadc_dev { struct device *dev; struct regmap *regmap; void __iomem *tscadc_base; + phys_addr_t tscadc_phys_base; int irq; int used_cells; /* 1-2 */ int tsc_wires; -- cgit v1.2.3 From f438b9da75eb80eb6c4095a5b75324cc9a7f0570 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Wed, 5 Oct 2016 14:34:41 +0530 Subject: drivers: iio: ti_am335x_adc: add dma support This patch adds the required pieces to ti_am335x_adc driver for DMA support Signed-off-by: Mugunthan V N Reviewed-by: Peter Ujfalusi Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti_am335x_adc.c | 148 ++++++++++++++++++++++++++++++++++- include/linux/mfd/ti_am335x_tscadc.h | 7 ++ 2 files changed, 152 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c index c3cfacca2541..ad9dec30bb30 100644 --- a/drivers/iio/adc/ti_am335x_adc.c +++ b/drivers/iio/adc/ti_am335x_adc.c @@ -30,10 +30,28 @@ #include #include +#include +#include + +#define DMA_BUFFER_SIZE SZ_2K + +struct tiadc_dma { + struct dma_slave_config conf; + struct dma_chan *chan; + dma_addr_t addr; + dma_cookie_t cookie; + u8 *buf; + int current_period; + int period_size; + u8 fifo_thresh; +}; + struct tiadc_device { struct ti_tscadc_dev *mfd_tscadc; + struct tiadc_dma dma; struct mutex fifo1_lock; /* to protect fifo access */ int channels; + int total_ch_enabled; u8 channel_line[8]; u8 channel_step[8]; int buffer_en_ch_steps; @@ -198,6 +216,67 @@ static irqreturn_t tiadc_worker_h(int irq, void *private) return IRQ_HANDLED; } +static void tiadc_dma_rx_complete(void *param) +{ + struct iio_dev *indio_dev = param; + struct tiadc_device *adc_dev = iio_priv(indio_dev); + struct tiadc_dma *dma = &adc_dev->dma; + u8 *data; + int i; + + data = dma->buf + dma->current_period * dma->period_size; + dma->current_period = 1 - dma->current_period; /* swap the buffer ID */ + + for (i = 0; i < dma->period_size; i += indio_dev->scan_bytes) { + iio_push_to_buffers(indio_dev, data); + data += indio_dev->scan_bytes; + } +} + +static int tiadc_start_dma(struct iio_dev *indio_dev) +{ + struct tiadc_device *adc_dev = iio_priv(indio_dev); + struct tiadc_dma *dma = &adc_dev->dma; + struct dma_async_tx_descriptor *desc; + + dma->current_period = 0; /* We start to fill period 0 */ + /* + * Make the fifo thresh as the multiple of total number of + * channels enabled, so make sure that cyclic DMA period + * length is also a multiple of total number of channels + * enabled. This ensures that no invalid data is reported + * to the stack via iio_push_to_buffers(). + */ + dma->fifo_thresh = rounddown(FIFO1_THRESHOLD + 1, + adc_dev->total_ch_enabled) - 1; + /* Make sure that period length is multiple of fifo thresh level */ + dma->period_size = rounddown(DMA_BUFFER_SIZE / 2, + (dma->fifo_thresh + 1) * sizeof(u16)); + + dma->conf.src_maxburst = dma->fifo_thresh + 1; + dmaengine_slave_config(dma->chan, &dma->conf); + + desc = dmaengine_prep_dma_cyclic(dma->chan, dma->addr, + dma->period_size * 2, + dma->period_size, DMA_DEV_TO_MEM, + DMA_PREP_INTERRUPT); + if (!desc) + return -EBUSY; + + desc->callback = tiadc_dma_rx_complete; + desc->callback_param = indio_dev; + + dma->cookie = dmaengine_submit(desc); + + dma_async_issue_pending(dma->chan); + + tiadc_writel(adc_dev, REG_FIFO1THR, dma->fifo_thresh); + tiadc_writel(adc_dev, REG_DMA1REQ, dma->fifo_thresh); + tiadc_writel(adc_dev, REG_DMAENABLE_SET, DMA_FIFO1); + + return 0; +} + static int tiadc_buffer_preenable(struct iio_dev *indio_dev) { struct tiadc_device *adc_dev = iio_priv(indio_dev); @@ -218,20 +297,30 @@ static int tiadc_buffer_preenable(struct iio_dev *indio_dev) static int tiadc_buffer_postenable(struct iio_dev *indio_dev) { struct tiadc_device *adc_dev = iio_priv(indio_dev); + struct tiadc_dma *dma = &adc_dev->dma; + unsigned int irq_enable; unsigned int enb = 0; u8 bit; tiadc_step_config(indio_dev); - for_each_set_bit(bit, indio_dev->active_scan_mask, adc_dev->channels) + for_each_set_bit(bit, indio_dev->active_scan_mask, adc_dev->channels) { enb |= (get_adc_step_bit(adc_dev, bit) << 1); + adc_dev->total_ch_enabled++; + } adc_dev->buffer_en_ch_steps = enb; + if (dma->chan) + tiadc_start_dma(indio_dev); + am335x_tsc_se_set_cache(adc_dev->mfd_tscadc, enb); tiadc_writel(adc_dev, REG_IRQSTATUS, IRQENB_FIFO1THRES | IRQENB_FIFO1OVRRUN | IRQENB_FIFO1UNDRFLW); - tiadc_writel(adc_dev, REG_IRQENABLE, IRQENB_FIFO1THRES - | IRQENB_FIFO1OVRRUN); + + irq_enable = IRQENB_FIFO1OVRRUN; + if (!dma->chan) + irq_enable |= IRQENB_FIFO1THRES; + tiadc_writel(adc_dev, REG_IRQENABLE, irq_enable); return 0; } @@ -239,12 +328,18 @@ static int tiadc_buffer_postenable(struct iio_dev *indio_dev) static int tiadc_buffer_predisable(struct iio_dev *indio_dev) { struct tiadc_device *adc_dev = iio_priv(indio_dev); + struct tiadc_dma *dma = &adc_dev->dma; int fifo1count, i, read; tiadc_writel(adc_dev, REG_IRQCLR, (IRQENB_FIFO1THRES | IRQENB_FIFO1OVRRUN | IRQENB_FIFO1UNDRFLW)); am335x_tsc_se_clr(adc_dev->mfd_tscadc, adc_dev->buffer_en_ch_steps); adc_dev->buffer_en_ch_steps = 0; + adc_dev->total_ch_enabled = 0; + if (dma->chan) { + tiadc_writel(adc_dev, REG_DMAENABLE_CLEAR, 0x2); + dmaengine_terminate_async(dma->chan); + } /* Flush FIFO of leftover data in the time it takes to disable adc */ fifo1count = tiadc_readl(adc_dev, REG_FIFO1CNT); @@ -430,6 +525,41 @@ static const struct iio_info tiadc_info = { .driver_module = THIS_MODULE, }; +static int tiadc_request_dma(struct platform_device *pdev, + struct tiadc_device *adc_dev) +{ + struct tiadc_dma *dma = &adc_dev->dma; + dma_cap_mask_t mask; + + /* Default slave configuration parameters */ + dma->conf.direction = DMA_DEV_TO_MEM; + dma->conf.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; + dma->conf.src_addr = adc_dev->mfd_tscadc->tscadc_phys_base + REG_FIFO1; + + dma_cap_zero(mask); + dma_cap_set(DMA_CYCLIC, mask); + + /* Get a channel for RX */ + dma->chan = dma_request_chan(adc_dev->mfd_tscadc->dev, "fifo1"); + if (IS_ERR(dma->chan)) { + int ret = PTR_ERR(dma->chan); + + dma->chan = NULL; + return ret; + } + + /* RX buffer */ + dma->buf = dma_alloc_coherent(dma->chan->device->dev, DMA_BUFFER_SIZE, + &dma->addr, GFP_KERNEL); + if (!dma->buf) + goto err; + + return 0; +err: + dma_release_channel(dma->chan); + return -ENOMEM; +} + static int tiadc_parse_dt(struct platform_device *pdev, struct tiadc_device *adc_dev) { @@ -512,8 +642,14 @@ static int tiadc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, indio_dev); + err = tiadc_request_dma(pdev, adc_dev); + if (err && err == -EPROBE_DEFER) + goto err_dma; + return 0; +err_dma: + iio_device_unregister(indio_dev); err_buffer_unregister: tiadc_iio_buffered_hardware_remove(indio_dev); err_free_channels: @@ -525,8 +661,14 @@ static int tiadc_remove(struct platform_device *pdev) { struct iio_dev *indio_dev = platform_get_drvdata(pdev); struct tiadc_device *adc_dev = iio_priv(indio_dev); + struct tiadc_dma *dma = &adc_dev->dma; u32 step_en; + if (dma->chan) { + dma_free_coherent(dma->chan->device->dev, DMA_BUFFER_SIZE, + dma->buf, dma->addr); + dma_release_channel(dma->chan); + } iio_device_unregister(indio_dev); tiadc_iio_buffered_hardware_remove(indio_dev); tiadc_channels_remove(indio_dev); diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h index e45a208d9944..b9a53e013bff 100644 --- a/include/linux/mfd/ti_am335x_tscadc.h +++ b/include/linux/mfd/ti_am335x_tscadc.h @@ -23,6 +23,8 @@ #define REG_IRQENABLE 0x02C #define REG_IRQCLR 0x030 #define REG_IRQWAKEUP 0x034 +#define REG_DMAENABLE_SET 0x038 +#define REG_DMAENABLE_CLEAR 0x03c #define REG_CTRL 0x040 #define REG_ADCFSM 0x044 #define REG_CLKDIV 0x04C @@ -36,6 +38,7 @@ #define REG_FIFO0THR 0xE8 #define REG_FIFO1CNT 0xF0 #define REG_FIFO1THR 0xF4 +#define REG_DMA1REQ 0xF8 #define REG_FIFO0 0x100 #define REG_FIFO1 0x200 @@ -126,6 +129,10 @@ #define FIFOREAD_DATA_MASK (0xfff << 0) #define FIFOREAD_CHNLID_MASK (0xf << 16) +/* DMA ENABLE/CLEAR Register */ +#define DMA_FIFO0 BIT(0) +#define DMA_FIFO1 BIT(1) + /* Sequencer Status */ #define SEQ_STATUS BIT(5) #define CHARGE_STEP 0x11 -- cgit v1.2.3 From d278d4a8892f13b6a9eb6102b356402f0e062324 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 30 Mar 2016 10:21:08 -0600 Subject: block: add code to track actual device queue depth For blk-mq, ->nr_requests does track queue depth, at least at init time. But for the older queue paths, it's simply a soft setting. On top of that, it's generally larger than the hardware setting on purpose, to allow backup of requests for merging. Fill a hole in struct request with a 'queue_depth' member, that drivers can call to more closely inform the block layer of the real queue depth. Signed-off-by: Jens Axboe Reviewed-by: Jan Kara --- block/blk-settings.c | 12 ++++++++++++ drivers/scsi/scsi.c | 3 +++ include/linux/blkdev.h | 11 +++++++++++ 3 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/block/blk-settings.c b/block/blk-settings.c index 55369a65dea2..9cf053759363 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -836,6 +836,18 @@ void blk_queue_flush_queueable(struct request_queue *q, bool queueable) } EXPORT_SYMBOL_GPL(blk_queue_flush_queueable); +/** + * blk_set_queue_depth - tell the block layer about the device queue depth + * @q: the request queue for the device + * @depth: queue depth + * + */ +void blk_set_queue_depth(struct request_queue *q, unsigned int depth) +{ + q->queue_depth = depth; +} +EXPORT_SYMBOL(blk_set_queue_depth); + /** * blk_queue_write_cache - configure queue's write cache * @q: the request queue for the device diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 1deb6adc411f..75455d4dab68 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -621,6 +621,9 @@ int scsi_change_queue_depth(struct scsi_device *sdev, int depth) wmb(); } + if (sdev->request_queue) + blk_set_queue_depth(sdev->request_queue, depth); + return sdev->queue_depth; } EXPORT_SYMBOL(scsi_change_queue_depth); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9189a2d5c392..d364be6e6959 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -405,6 +405,8 @@ struct request_queue { struct blk_mq_ctx __percpu *queue_ctx; unsigned int nr_queues; + unsigned int queue_depth; + /* hw dispatch queues */ struct blk_mq_hw_ctx **queue_hw_ctx; unsigned int nr_hw_queues; @@ -777,6 +779,14 @@ static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b) return false; } +static inline unsigned int blk_queue_depth(struct request_queue *q) +{ + if (q->queue_depth) + return q->queue_depth; + + return q->nr_requests; +} + /* * q->prep_rq_fn return values */ @@ -1094,6 +1104,7 @@ extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); +extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); extern void blk_set_default_limits(struct queue_limits *lim); extern void blk_set_stacking_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, -- cgit v1.2.3 From fa32ff6576623616c1751562edaed8c164ca5199 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Sun, 6 Nov 2016 13:14:05 -0800 Subject: Drivers: hv: ring_buffer: count on wrap around mappings in get_next_pkt_raw() (v2) With wrap around mappings in place we can always provide drivers with direct links to packets on the ring buffer, even when they wrap around. Do the required updates to get_next_pkt_raw()/put_pkt_raw() The first version of this commit was reverted (65a532f3d50a) to deal with cross-tree merge issues which are (hopefully) resolved now. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Tested-by: Dexuan Cui Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 6824556d37ed..42ae6a5a2538 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1526,31 +1526,23 @@ static inline struct vmpacket_descriptor * get_next_pkt_raw(struct vmbus_channel *channel) { struct hv_ring_buffer_info *ring_info = &channel->inbound; - u32 read_loc = ring_info->priv_read_index; + u32 priv_read_loc = ring_info->priv_read_index; void *ring_buffer = hv_get_ring_buffer(ring_info); - struct vmpacket_descriptor *cur_desc; - u32 packetlen; u32 dsize = ring_info->ring_datasize; - u32 delta = read_loc - ring_info->ring_buffer->read_index; + /* + * delta is the difference between what is available to read and + * what was already consumed in place. We commit read index after + * the whole batch is processed. + */ + u32 delta = priv_read_loc >= ring_info->ring_buffer->read_index ? + priv_read_loc - ring_info->ring_buffer->read_index : + (dsize - ring_info->ring_buffer->read_index) + priv_read_loc; u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta); if (bytes_avail_toread < sizeof(struct vmpacket_descriptor)) return NULL; - if ((read_loc + sizeof(*cur_desc)) > dsize) - return NULL; - - cur_desc = ring_buffer + read_loc; - packetlen = cur_desc->len8 << 3; - - /* - * If the packet under consideration is wrapping around, - * return failure. - */ - if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > (dsize - 1)) - return NULL; - - return cur_desc; + return ring_buffer + priv_read_loc; } /* @@ -1562,16 +1554,14 @@ static inline void put_pkt_raw(struct vmbus_channel *channel, struct vmpacket_descriptor *desc) { struct hv_ring_buffer_info *ring_info = &channel->inbound; - u32 read_loc = ring_info->priv_read_index; u32 packetlen = desc->len8 << 3; u32 dsize = ring_info->ring_datasize; - if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > dsize) - BUG(); /* * Include the packet trailer. */ ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER; + ring_info->priv_read_index %= dsize; } /* -- cgit v1.2.3 From 1f6ee4e7d83586c8b10bd4f2f4346353d04ce884 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sun, 6 Nov 2016 13:14:17 -0800 Subject: Drivers: hv: vmbus: On write cleanup the logic to interrupt the host Signal the host when we determine the host is to be signaled. The currrent code determines the need to signal in the ringbuffer code and actually issues the signal elsewhere. This can result in the host viewing this interrupt as spurious since the host may also poll the channel. Make the necessary adjustments. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 99 +++++------------------------------------------ drivers/hv/hyperv_vmbus.h | 6 +-- drivers/hv/ring_buffer.c | 30 +++++++++----- include/linux/hyperv.h | 1 + 4 files changed, 35 insertions(+), 101 deletions(-) (limited to 'include/linux') diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 5e482d7f60cb..8a8148f7b842 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -39,7 +39,7 @@ * vmbus_setevent- Trigger an event notification on the specified * channel. */ -static void vmbus_setevent(struct vmbus_channel *channel) +void vmbus_setevent(struct vmbus_channel *channel) { struct hv_monitor_page *monitorpage; @@ -65,6 +65,7 @@ static void vmbus_setevent(struct vmbus_channel *channel) vmbus_set_event(channel); } } +EXPORT_SYMBOL_GPL(vmbus_setevent); /* * vmbus_open - Open the specified channel. @@ -635,8 +636,6 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, u32 packetlen_aligned = ALIGN(packetlen, sizeof(u64)); struct kvec bufferlist[3]; u64 aligned_data = 0; - int ret; - bool signal = false; bool lock = channel->acquire_ring_lock; int num_vecs = ((bufferlen != 0) ? 3 : 1); @@ -656,41 +655,9 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, bufferlist[2].iov_base = &aligned_data; bufferlist[2].iov_len = (packetlen_aligned - packetlen); - ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs, - &signal, lock, channel->signal_policy); - - /* - * Signalling the host is conditional on many factors: - * 1. The ring state changed from being empty to non-empty. - * This is tracked by the variable "signal". - * 2. The variable kick_q tracks if more data will be placed - * on the ring. We will not signal if more data is - * to be placed. - * - * Based on the channel signal state, we will decide - * which signaling policy will be applied. - * - * If we cannot write to the ring-buffer; signal the host - * even if we may not have written anything. This is a rare - * enough condition that it should not matter. - * NOTE: in this case, the hvsock channel is an exception, because - * it looks the host side's hvsock implementation has a throttling - * mechanism which can hurt the performance otherwise. - * - * KYS: Oct. 30, 2016: - * It looks like Windows hosts have logic to deal with DOS attacks that - * can be triggered if it receives interrupts when it is not expecting - * the interrupt. The host expects interrupts only when the ring - * transitions from empty to non-empty (or full to non full on the guest - * to host ring). - * So, base the signaling decision solely on the ring state until the - * host logic is fixed. - */ - - if (((ret == 0) && signal)) - vmbus_setevent(channel); + return hv_ringbuffer_write(channel, bufferlist, num_vecs, + lock, kick_q); - return ret; } EXPORT_SYMBOL(vmbus_sendpacket_ctl); @@ -731,7 +698,6 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, u32 flags, bool kick_q) { - int ret; int i; struct vmbus_channel_packet_page_buffer desc; u32 descsize; @@ -739,7 +705,6 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, u32 packetlen_aligned; struct kvec bufferlist[3]; u64 aligned_data = 0; - bool signal = false; bool lock = channel->acquire_ring_lock; if (pagecount > MAX_PAGE_BUFFER_COUNT) @@ -777,38 +742,8 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, bufferlist[2].iov_base = &aligned_data; bufferlist[2].iov_len = (packetlen_aligned - packetlen); - ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, - &signal, lock, channel->signal_policy); - - /* - * Signalling the host is conditional on many factors: - * 1. The ring state changed from being empty to non-empty. - * This is tracked by the variable "signal". - * 2. The variable kick_q tracks if more data will be placed - * on the ring. We will not signal if more data is - * to be placed. - * - * Based on the channel signal state, we will decide - * which signaling policy will be applied. - * - * If we cannot write to the ring-buffer; signal the host - * even if we may not have written anything. This is a rare - * enough condition that it should not matter. - * - * KYS: Oct. 30, 2016: - * It looks like Windows hosts have logic to deal with DOS attacks that - * can be triggered if it receives interrupts when it is not expecting - * the interrupt. The host expects interrupts only when the ring - * transitions from empty to non-empty (or full to non full on the guest - * to host ring). - * So, base the signaling decision solely on the ring state until the - * host logic is fixed. - */ - - if (((ret == 0) && signal)) - vmbus_setevent(channel); - - return ret; + return hv_ringbuffer_write(channel, bufferlist, 3, + lock, kick_q); } EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer_ctl); @@ -839,12 +774,10 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, u32 desc_size, void *buffer, u32 bufferlen, u64 requestid) { - int ret; u32 packetlen; u32 packetlen_aligned; struct kvec bufferlist[3]; u64 aligned_data = 0; - bool signal = false; bool lock = channel->acquire_ring_lock; packetlen = desc_size + bufferlen; @@ -865,13 +798,8 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, bufferlist[2].iov_base = &aligned_data; bufferlist[2].iov_len = (packetlen_aligned - packetlen); - ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, - &signal, lock, channel->signal_policy); - - if (ret == 0 && signal) - vmbus_setevent(channel); - - return ret; + return hv_ringbuffer_write(channel, bufferlist, 3, + lock, true); } EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc); @@ -883,14 +811,12 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, struct hv_multipage_buffer *multi_pagebuffer, void *buffer, u32 bufferlen, u64 requestid) { - int ret; struct vmbus_channel_packet_multipage_buffer desc; u32 descsize; u32 packetlen; u32 packetlen_aligned; struct kvec bufferlist[3]; u64 aligned_data = 0; - bool signal = false; bool lock = channel->acquire_ring_lock; u32 pfncount = NUM_PAGES_SPANNED(multi_pagebuffer->offset, multi_pagebuffer->len); @@ -930,13 +856,8 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, bufferlist[2].iov_base = &aligned_data; bufferlist[2].iov_len = (packetlen_aligned - packetlen); - ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, - &signal, lock, channel->signal_policy); - - if (ret == 0 && signal) - vmbus_setevent(channel); - - return ret; + return hv_ringbuffer_write(channel, bufferlist, 3, + lock, true); } EXPORT_SYMBOL_GPL(vmbus_sendpacket_multipagebuffer); diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index bdab7e7f2a15..2d42ebe2af6c 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -527,10 +527,10 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info); -int hv_ringbuffer_write(struct hv_ring_buffer_info *ring_info, +int hv_ringbuffer_write(struct vmbus_channel *channel, struct kvec *kv_list, - u32 kv_count, bool *signal, bool lock, - enum hv_signal_policy policy); + u32 kv_count, bool lock, + bool kick_q); int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, void *buffer, u32 buflen, u32 *buffer_actual_len, diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 5d11d93eedf4..4af71306d0ff 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -66,14 +66,25 @@ u32 hv_end_read(struct hv_ring_buffer_info *rbi) * once the ring buffer is empty, it will clear the * interrupt_mask and re-check to see if new data has * arrived. + * + * KYS: Oct. 30, 2016: + * It looks like Windows hosts have logic to deal with DOS attacks that + * can be triggered if it receives interrupts when it is not expecting + * the interrupt. The host expects interrupts only when the ring + * transitions from empty to non-empty (or full to non full on the guest + * to host ring). + * So, base the signaling decision solely on the ring state until the + * host logic is fixed. */ -static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi, - enum hv_signal_policy policy) +static void hv_signal_on_write(u32 old_write, struct vmbus_channel *channel, + bool kick_q) { + struct hv_ring_buffer_info *rbi = &channel->outbound; + virt_mb(); if (READ_ONCE(rbi->ring_buffer->interrupt_mask)) - return false; + return; /* check interrupt_mask before read_index */ virt_rmb(); @@ -82,9 +93,9 @@ static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi, * ring transitions from being empty to non-empty. */ if (old_write == READ_ONCE(rbi->ring_buffer->read_index)) - return true; + vmbus_setevent(channel); - return false; + return; } /* Get the next write location for the specified ring buffer. */ @@ -273,9 +284,9 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info) } /* Write to the ring buffer. */ -int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, - struct kvec *kv_list, u32 kv_count, bool *signal, bool lock, - enum hv_signal_policy policy) +int hv_ringbuffer_write(struct vmbus_channel *channel, + struct kvec *kv_list, u32 kv_count, bool lock, + bool kick_q) { int i = 0; u32 bytes_avail_towrite; @@ -285,6 +296,7 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, u32 old_write; u64 prev_indices = 0; unsigned long flags = 0; + struct hv_ring_buffer_info *outring_info = &channel->outbound; for (i = 0; i < kv_count; i++) totalbytes_towrite += kv_list[i].iov_len; @@ -337,7 +349,7 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, if (lock) spin_unlock_irqrestore(&outring_info->ring_lock, flags); - *signal = hv_need_to_signal(old_write, outring_info, policy); + hv_signal_on_write(old_write, channel, kick_q); return 0; } diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 42ae6a5a2538..8cf78ed96747 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1454,6 +1454,7 @@ void hv_event_tasklet_enable(struct vmbus_channel *channel); void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); +void vmbus_setevent(struct vmbus_channel *channel); /* * Negotiated version with the Host. */ -- cgit v1.2.3 From 3372592a140db69fd63837e81f048ab4abf8111e Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sun, 6 Nov 2016 13:14:18 -0800 Subject: Drivers: hv: vmbus: On the read path cleanup the logic to interrupt the host Signal the host when we determine the host is to be signaled - on th read path. The currrent code determines the need to signal in the ringbuffer code and actually issues the signal elsewhere. This can result in the host viewing this interrupt as spurious since the host may also poll the channel. Make the necessary adjustments. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 11 ++--------- drivers/hv/hyperv_vmbus.h | 4 ++-- drivers/hv/ring_buffer.c | 7 ++++--- include/linux/hyperv.h | 12 ++++++------ 4 files changed, 14 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 8a8148f7b842..5fb4c6d9209b 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -879,16 +879,9 @@ __vmbus_recvpacket(struct vmbus_channel *channel, void *buffer, u32 bufferlen, u32 *buffer_actual_len, u64 *requestid, bool raw) { - int ret; - bool signal = false; - - ret = hv_ringbuffer_read(&channel->inbound, buffer, bufferlen, - buffer_actual_len, requestid, &signal, raw); + return hv_ringbuffer_read(channel, buffer, bufferlen, + buffer_actual_len, requestid, raw); - if (signal) - vmbus_setevent(channel); - - return ret; } int vmbus_recvpacket(struct vmbus_channel *channel, void *buffer, diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 2d42ebe2af6c..0675b395ce5c 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -532,9 +532,9 @@ int hv_ringbuffer_write(struct vmbus_channel *channel, u32 kv_count, bool lock, bool kick_q); -int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, +int hv_ringbuffer_read(struct vmbus_channel *channel, void *buffer, u32 buflen, u32 *buffer_actual_len, - u64 *requestid, bool *signal, bool raw); + u64 *requestid, bool raw); void hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info, struct hv_ring_buffer_debug_info *debug_info); diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 4af71306d0ff..cd49cb17eb7f 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -353,9 +353,9 @@ int hv_ringbuffer_write(struct vmbus_channel *channel, return 0; } -int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, +int hv_ringbuffer_read(struct vmbus_channel *channel, void *buffer, u32 buflen, u32 *buffer_actual_len, - u64 *requestid, bool *signal, bool raw) + u64 *requestid, bool raw) { u32 bytes_avail_toread; u32 next_read_location = 0; @@ -364,6 +364,7 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, u32 offset; u32 packetlen; int ret = 0; + struct hv_ring_buffer_info *inring_info = &channel->inbound; if (buflen <= 0) return -EINVAL; @@ -421,7 +422,7 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, /* Update the read index */ hv_set_next_read_location(inring_info, next_read_location); - *signal = hv_need_to_signal_on_read(inring_info); + hv_signal_on_read(channel); return ret; } diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 8cf78ed96747..fdb0a87323f3 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1487,10 +1487,11 @@ hv_get_ring_buffer(struct hv_ring_buffer_info *ring_info) * there is room for the producer to send the pending packet. */ -static inline bool hv_need_to_signal_on_read(struct hv_ring_buffer_info *rbi) +static inline void hv_signal_on_read(struct vmbus_channel *channel) { u32 cur_write_sz; u32 pending_sz; + struct hv_ring_buffer_info *rbi = &channel->inbound; /* * Issue a full memory barrier before making the signaling decision. @@ -1508,14 +1509,14 @@ static inline bool hv_need_to_signal_on_read(struct hv_ring_buffer_info *rbi) pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz); /* If the other end is not blocked on write don't bother. */ if (pending_sz == 0) - return false; + return; cur_write_sz = hv_get_bytes_to_write(rbi); if (cur_write_sz >= pending_sz) - return true; + vmbus_setevent(channel); - return false; + return; } /* @@ -1587,8 +1588,7 @@ static inline void commit_rd_index(struct vmbus_channel *channel) virt_rmb(); ring_info->ring_buffer->read_index = ring_info->priv_read_index; - if (hv_need_to_signal_on_read(ring_info)) - vmbus_set_event(channel); + hv_signal_on_read(channel); } -- cgit v1.2.3 From e9f66ae23c209eec617130126a23bf547bf7a6d8 Mon Sep 17 00:00:00 2001 From: Sergio Prado Date: Thu, 20 Oct 2016 19:42:44 -0200 Subject: mtd: s3c2410: make ecc mode configurable via platform data Removing CONFIG_MTD_NAND_S3C2410_HWECC option and adding a ecc_mode field in the drivers's platform data structure so it can be selectable via platform data. Also setting this field to NAND_ECC_SOFT in all boards using this driver since none of them had CONFIG_MTD_NAND_S3C2410_HWECC enabled. Signed-off-by: Sergio Prado Acked-by: Krzysztof Kozlowski Signed-off-by: Boris Brezillon --- arch/arm/mach-s3c24xx/common-smdk.c | 1 + arch/arm/mach-s3c24xx/mach-anubis.c | 1 + arch/arm/mach-s3c24xx/mach-at2440evb.c | 1 + arch/arm/mach-s3c24xx/mach-bast.c | 1 + arch/arm/mach-s3c24xx/mach-gta02.c | 1 + arch/arm/mach-s3c24xx/mach-jive.c | 1 + arch/arm/mach-s3c24xx/mach-mini2440.c | 1 + arch/arm/mach-s3c24xx/mach-osiris.c | 1 + arch/arm/mach-s3c24xx/mach-qt2410.c | 1 + arch/arm/mach-s3c24xx/mach-rx1950.c | 1 + arch/arm/mach-s3c24xx/mach-rx3715.c | 1 + arch/arm/mach-s3c24xx/mach-vstms.c | 1 + arch/arm/mach-s3c64xx/mach-hmt.c | 1 + arch/arm/mach-s3c64xx/mach-mini6410.c | 1 + arch/arm/mach-s3c64xx/mach-real6410.c | 1 + drivers/mtd/nand/Kconfig | 9 -- drivers/mtd/nand/s3c2410.c | 123 +++++++++++++------------ include/linux/platform_data/mtd-nand-s3c2410.h | 6 +- 18 files changed, 82 insertions(+), 71 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-s3c24xx/common-smdk.c b/arch/arm/mach-s3c24xx/common-smdk.c index e9fbcc91c5c0..9e0bc46e90ec 100644 --- a/arch/arm/mach-s3c24xx/common-smdk.c +++ b/arch/arm/mach-s3c24xx/common-smdk.c @@ -171,6 +171,7 @@ static struct s3c2410_platform_nand smdk_nand_info = { .twrph1 = 20, .nr_sets = ARRAY_SIZE(smdk_nand_sets), .sets = smdk_nand_sets, + .ecc_mode = NAND_ECC_SOFT, }; /* devices we initialise */ diff --git a/arch/arm/mach-s3c24xx/mach-anubis.c b/arch/arm/mach-s3c24xx/mach-anubis.c index d03df0df01fa..029ef1b58925 100644 --- a/arch/arm/mach-s3c24xx/mach-anubis.c +++ b/arch/arm/mach-s3c24xx/mach-anubis.c @@ -223,6 +223,7 @@ static struct s3c2410_platform_nand __initdata anubis_nand_info = { .nr_sets = ARRAY_SIZE(anubis_nand_sets), .sets = anubis_nand_sets, .select_chip = anubis_nand_select, + .ecc_mode = NAND_ECC_SOFT, }; /* IDE channels */ diff --git a/arch/arm/mach-s3c24xx/mach-at2440evb.c b/arch/arm/mach-s3c24xx/mach-at2440evb.c index 9ae170fef2a7..7b28eb623fc1 100644 --- a/arch/arm/mach-s3c24xx/mach-at2440evb.c +++ b/arch/arm/mach-s3c24xx/mach-at2440evb.c @@ -114,6 +114,7 @@ static struct s3c2410_platform_nand __initdata at2440evb_nand_info = { .twrph1 = 40, .nr_sets = ARRAY_SIZE(at2440evb_nand_sets), .sets = at2440evb_nand_sets, + .ecc_mode = NAND_ECC_SOFT, }; /* DM9000AEP 10/100 ethernet controller */ diff --git a/arch/arm/mach-s3c24xx/mach-bast.c b/arch/arm/mach-s3c24xx/mach-bast.c index ed07cf392d4b..5185036765db 100644 --- a/arch/arm/mach-s3c24xx/mach-bast.c +++ b/arch/arm/mach-s3c24xx/mach-bast.c @@ -299,6 +299,7 @@ static struct s3c2410_platform_nand __initdata bast_nand_info = { .nr_sets = ARRAY_SIZE(bast_nand_sets), .sets = bast_nand_sets, .select_chip = bast_nand_select, + .ecc_mode = NAND_ECC_SOFT, }; /* DM9000 */ diff --git a/arch/arm/mach-s3c24xx/mach-gta02.c b/arch/arm/mach-s3c24xx/mach-gta02.c index 27ae6877550f..b0ed401da3a3 100644 --- a/arch/arm/mach-s3c24xx/mach-gta02.c +++ b/arch/arm/mach-s3c24xx/mach-gta02.c @@ -443,6 +443,7 @@ static struct s3c2410_platform_nand __initdata gta02_nand_info = { .twrph1 = 15, .nr_sets = ARRAY_SIZE(gta02_nand_sets), .sets = gta02_nand_sets, + .ecc_mode = NAND_ECC_SOFT, }; diff --git a/arch/arm/mach-s3c24xx/mach-jive.c b/arch/arm/mach-s3c24xx/mach-jive.c index 7d99fe8f6157..895aca225952 100644 --- a/arch/arm/mach-s3c24xx/mach-jive.c +++ b/arch/arm/mach-s3c24xx/mach-jive.c @@ -232,6 +232,7 @@ static struct s3c2410_platform_nand __initdata jive_nand_info = { .twrph1 = 40, .sets = jive_nand_sets, .nr_sets = ARRAY_SIZE(jive_nand_sets), + .ecc_mode = NAND_ECC_SOFT, }; static int __init jive_mtdset(char *options) diff --git a/arch/arm/mach-s3c24xx/mach-mini2440.c b/arch/arm/mach-s3c24xx/mach-mini2440.c index ec60bd4a1646..71af8d2fd320 100644 --- a/arch/arm/mach-s3c24xx/mach-mini2440.c +++ b/arch/arm/mach-s3c24xx/mach-mini2440.c @@ -287,6 +287,7 @@ static struct s3c2410_platform_nand mini2440_nand_info __initdata = { .nr_sets = ARRAY_SIZE(mini2440_nand_sets), .sets = mini2440_nand_sets, .ignore_unset_ecc = 1, + .ecc_mode = NAND_ECC_SOFT, }; /* DM9000AEP 10/100 ethernet controller */ diff --git a/arch/arm/mach-s3c24xx/mach-osiris.c b/arch/arm/mach-s3c24xx/mach-osiris.c index 2f6fdc326835..70b0eb7d3134 100644 --- a/arch/arm/mach-s3c24xx/mach-osiris.c +++ b/arch/arm/mach-s3c24xx/mach-osiris.c @@ -238,6 +238,7 @@ static struct s3c2410_platform_nand __initdata osiris_nand_info = { .nr_sets = ARRAY_SIZE(osiris_nand_sets), .sets = osiris_nand_sets, .select_chip = osiris_nand_select, + .ecc_mode = NAND_ECC_SOFT, }; /* PCMCIA control and configuration */ diff --git a/arch/arm/mach-s3c24xx/mach-qt2410.c b/arch/arm/mach-s3c24xx/mach-qt2410.c index 984516e8307a..868c82087403 100644 --- a/arch/arm/mach-s3c24xx/mach-qt2410.c +++ b/arch/arm/mach-s3c24xx/mach-qt2410.c @@ -284,6 +284,7 @@ static struct s3c2410_platform_nand __initdata qt2410_nand_info = { .twrph1 = 20, .nr_sets = ARRAY_SIZE(qt2410_nand_sets), .sets = qt2410_nand_sets, + .ecc_mode = NAND_ECC_SOFT, }; /* UDC */ diff --git a/arch/arm/mach-s3c24xx/mach-rx1950.c b/arch/arm/mach-s3c24xx/mach-rx1950.c index 25a139bb9826..e86ad6a68a0b 100644 --- a/arch/arm/mach-s3c24xx/mach-rx1950.c +++ b/arch/arm/mach-s3c24xx/mach-rx1950.c @@ -611,6 +611,7 @@ static struct s3c2410_platform_nand rx1950_nand_info = { .twrph1 = 15, .nr_sets = ARRAY_SIZE(rx1950_nand_sets), .sets = rx1950_nand_sets, + .ecc_mode = NAND_ECC_SOFT, }; static struct s3c2410_udc_mach_info rx1950_udc_cfg __initdata = { diff --git a/arch/arm/mach-s3c24xx/mach-rx3715.c b/arch/arm/mach-s3c24xx/mach-rx3715.c index cf55196f89ca..a39fb9780dd3 100644 --- a/arch/arm/mach-s3c24xx/mach-rx3715.c +++ b/arch/arm/mach-s3c24xx/mach-rx3715.c @@ -164,6 +164,7 @@ static struct s3c2410_platform_nand __initdata rx3715_nand_info = { .twrph1 = 15, .nr_sets = ARRAY_SIZE(rx3715_nand_sets), .sets = rx3715_nand_sets, + .ecc_mode = NAND_ECC_SOFT, }; static struct platform_device *rx3715_devices[] __initdata = { diff --git a/arch/arm/mach-s3c24xx/mach-vstms.c b/arch/arm/mach-s3c24xx/mach-vstms.c index b4460d5f7011..f5e6322145fa 100644 --- a/arch/arm/mach-s3c24xx/mach-vstms.c +++ b/arch/arm/mach-s3c24xx/mach-vstms.c @@ -117,6 +117,7 @@ static struct s3c2410_platform_nand __initdata vstms_nand_info = { .twrph1 = 20, .nr_sets = ARRAY_SIZE(vstms_nand_sets), .sets = vstms_nand_sets, + .ecc_mode = NAND_ECC_SOFT, }; static struct platform_device *vstms_devices[] __initdata = { diff --git a/arch/arm/mach-s3c64xx/mach-hmt.c b/arch/arm/mach-s3c64xx/mach-hmt.c index bc7dc1fcbf7d..59b5531f1987 100644 --- a/arch/arm/mach-s3c64xx/mach-hmt.c +++ b/arch/arm/mach-s3c64xx/mach-hmt.c @@ -204,6 +204,7 @@ static struct s3c2410_platform_nand hmt_nand_info = { .twrph1 = 40, .nr_sets = ARRAY_SIZE(hmt_nand_sets), .sets = hmt_nand_sets, + .ecc_mode = NAND_ECC_SOFT, }; static struct gpio_led hmt_leds[] = { diff --git a/arch/arm/mach-s3c64xx/mach-mini6410.c b/arch/arm/mach-s3c64xx/mach-mini6410.c index ae999fb3fe6d..a3e3e25728b4 100644 --- a/arch/arm/mach-s3c64xx/mach-mini6410.c +++ b/arch/arm/mach-s3c64xx/mach-mini6410.c @@ -142,6 +142,7 @@ static struct s3c2410_platform_nand mini6410_nand_info = { .twrph1 = 40, .nr_sets = ARRAY_SIZE(mini6410_nand_sets), .sets = mini6410_nand_sets, + .ecc_mode = NAND_ECC_SOFT, }; static struct s3c_fb_pd_win mini6410_lcd_type0_fb_win = { diff --git a/arch/arm/mach-s3c64xx/mach-real6410.c b/arch/arm/mach-s3c64xx/mach-real6410.c index 4e240ffa7ac7..d6b3ffd7704b 100644 --- a/arch/arm/mach-s3c64xx/mach-real6410.c +++ b/arch/arm/mach-s3c64xx/mach-real6410.c @@ -194,6 +194,7 @@ static struct s3c2410_platform_nand real6410_nand_info = { .twrph1 = 40, .nr_sets = ARRAY_SIZE(real6410_nand_sets), .sets = real6410_nand_sets, + .ecc_mode = NAND_ECC_SOFT, }; static struct platform_device *real6410_devices[] __initdata = { diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index c023125989cf..60757af314d3 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -179,15 +179,6 @@ config MTD_NAND_S3C2410_DEBUG help Enable debugging of the S3C NAND driver -config MTD_NAND_S3C2410_HWECC - bool "Samsung S3C NAND Hardware ECC" - depends on MTD_NAND_S3C2410 - help - Enable the use of the controller's internal ECC generator when - using NAND. Early versions of the chips have had problems with - incorrect ECC generation, and if using these, the default of - software ECC is preferable. - config MTD_NAND_NDFC tristate "NDFC NanD Flash Controller" depends on 4xx diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c index d459c19d78de..bb4ac3179d17 100644 --- a/drivers/mtd/nand/s3c2410.c +++ b/drivers/mtd/nand/s3c2410.c @@ -497,7 +497,6 @@ static int s3c2412_nand_devready(struct mtd_info *mtd) /* ECC handling functions */ -#ifdef CONFIG_MTD_NAND_S3C2410_HWECC static int s3c2410_nand_correct_data(struct mtd_info *mtd, u_char *dat, u_char *read_ecc, u_char *calc_ecc) { @@ -649,7 +648,6 @@ static int s3c2440_nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat, return 0; } -#endif /* over-ride the standard functions for a little more speed. We can * use read/write block to move the data buffers to/from the controller @@ -858,50 +856,7 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info, nmtd->info = info; nmtd->set = set; -#ifdef CONFIG_MTD_NAND_S3C2410_HWECC - chip->ecc.calculate = s3c2410_nand_calculate_ecc; - chip->ecc.correct = s3c2410_nand_correct_data; - chip->ecc.mode = NAND_ECC_HW; - chip->ecc.strength = 1; - - switch (info->cpu_type) { - case TYPE_S3C2410: - chip->ecc.hwctl = s3c2410_nand_enable_hwecc; - chip->ecc.calculate = s3c2410_nand_calculate_ecc; - break; - - case TYPE_S3C2412: - chip->ecc.hwctl = s3c2412_nand_enable_hwecc; - chip->ecc.calculate = s3c2412_nand_calculate_ecc; - break; - - case TYPE_S3C2440: - chip->ecc.hwctl = s3c2440_nand_enable_hwecc; - chip->ecc.calculate = s3c2440_nand_calculate_ecc; - break; - } -#else - chip->ecc.mode = NAND_ECC_SOFT; - chip->ecc.algo = NAND_ECC_HAMMING; -#endif - - if (set->disable_ecc) - chip->ecc.mode = NAND_ECC_NONE; - - switch (chip->ecc.mode) { - case NAND_ECC_NONE: - dev_info(info->device, "NAND ECC disabled\n"); - break; - case NAND_ECC_SOFT: - dev_info(info->device, "NAND soft ECC\n"); - break; - case NAND_ECC_HW: - dev_info(info->device, "NAND hardware ECC\n"); - break; - default: - dev_info(info->device, "NAND ECC UNKNOWN\n"); - break; - } + chip->ecc.mode = info->platform->ecc_mode; /* If you use u-boot BBT creation code, specifying this flag will * let the kernel fish out the BBT from the NAND, and also skip the @@ -923,28 +878,74 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info, * * The internal state is currently limited to the ECC state information. */ -static void s3c2410_nand_update_chip(struct s3c2410_nand_info *info, - struct s3c2410_nand_mtd *nmtd) +static int s3c2410_nand_update_chip(struct s3c2410_nand_info *info, + struct s3c2410_nand_mtd *nmtd) { struct nand_chip *chip = &nmtd->chip; - dev_dbg(info->device, "chip %p => page shift %d\n", - chip, chip->page_shift); + switch (chip->ecc.mode) { - if (chip->ecc.mode != NAND_ECC_HW) - return; + case NAND_ECC_NONE: + dev_info(info->device, "ECC disabled\n"); + break; + + case NAND_ECC_SOFT: + /* + * This driver expects Hamming based ECC when ecc_mode is set + * to NAND_ECC_SOFT. Force ecc.algo to NAND_ECC_HAMMING to + * avoid adding an extra ecc_algo field to + * s3c2410_platform_nand. + */ + chip->ecc.algo = NAND_ECC_HAMMING; + dev_info(info->device, "soft ECC\n"); + break; + + case NAND_ECC_HW: + chip->ecc.calculate = s3c2410_nand_calculate_ecc; + chip->ecc.correct = s3c2410_nand_correct_data; + chip->ecc.strength = 1; + + switch (info->cpu_type) { + case TYPE_S3C2410: + chip->ecc.hwctl = s3c2410_nand_enable_hwecc; + chip->ecc.calculate = s3c2410_nand_calculate_ecc; + break; + + case TYPE_S3C2412: + chip->ecc.hwctl = s3c2412_nand_enable_hwecc; + chip->ecc.calculate = s3c2412_nand_calculate_ecc; + break; + + case TYPE_S3C2440: + chip->ecc.hwctl = s3c2440_nand_enable_hwecc; + chip->ecc.calculate = s3c2440_nand_calculate_ecc; + break; + } + + dev_dbg(info->device, "chip %p => page shift %d\n", + chip, chip->page_shift); /* change the behaviour depending on whether we are using * the large or small page nand device */ + if (chip->page_shift > 10) { + chip->ecc.size = 256; + chip->ecc.bytes = 3; + } else { + chip->ecc.size = 512; + chip->ecc.bytes = 3; + mtd_set_ooblayout(nand_to_mtd(chip), + &s3c2410_ooblayout_ops); + } - if (chip->page_shift > 10) { - chip->ecc.size = 256; - chip->ecc.bytes = 3; - } else { - chip->ecc.size = 512; - chip->ecc.bytes = 3; - mtd_set_ooblayout(nand_to_mtd(chip), &s3c2410_ooblayout_ops); + dev_info(info->device, "hardware ECC\n"); + break; + + default: + dev_err(info->device, "invalid ECC mode!\n"); + return -EINVAL; } + + return 0; } /* s3c24xx_nand_probe @@ -1046,7 +1047,9 @@ static int s3c24xx_nand_probe(struct platform_device *pdev) NULL); if (nmtd->scan_res == 0) { - s3c2410_nand_update_chip(info, nmtd); + err = s3c2410_nand_update_chip(info, nmtd); + if (err < 0) + goto exit_error; nand_scan_tail(mtd); s3c2410_nand_add_partition(info, nmtd, sets); } diff --git a/include/linux/platform_data/mtd-nand-s3c2410.h b/include/linux/platform_data/mtd-nand-s3c2410.h index c55e42ee57fa..729af13d1773 100644 --- a/include/linux/platform_data/mtd-nand-s3c2410.h +++ b/include/linux/platform_data/mtd-nand-s3c2410.h @@ -12,9 +12,10 @@ #ifndef __MTD_NAND_S3C2410_H #define __MTD_NAND_S3C2410_H +#include + /** * struct s3c2410_nand_set - define a set of one or more nand chips - * @disable_ecc: Entirely disable ECC - Dangerous * @flash_bbt: Openmoko u-boot can create a Bad Block Table * Setting this flag will allow the kernel to * look for it at boot time and also skip the NAND @@ -31,7 +32,6 @@ * a warning at boot time. */ struct s3c2410_nand_set { - unsigned int disable_ecc:1; unsigned int flash_bbt:1; unsigned int options; @@ -51,6 +51,8 @@ struct s3c2410_platform_nand { unsigned int ignore_unset_ecc:1; + nand_ecc_modes_t ecc_mode; + int nr_sets; struct s3c2410_nand_set *sets; -- cgit v1.2.3 From 1c825ad1b8cfe12ccc145dcdba360c52c0272c04 Mon Sep 17 00:00:00 2001 From: Sergio Prado Date: Wed, 26 Oct 2016 21:59:55 -0200 Subject: mtd: s3c2410: parse the device configuration from OF node Allows configuring Samsung's s3c2410 memory controller using a devicetree. Signed-off-by: Sergio Prado Acked-by: Krzysztof Kozlowski Signed-off-by: Boris Brezillon --- drivers/mtd/nand/s3c2410.c | 163 ++++++++++++++++++++++--- include/linux/platform_data/mtd-nand-s3c2410.h | 1 + 2 files changed, 147 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c index bb4ac3179d17..f0b030d44f71 100644 --- a/drivers/mtd/nand/s3c2410.c +++ b/drivers/mtd/nand/s3c2410.c @@ -39,6 +39,8 @@ #include #include #include +#include +#include #include #include @@ -185,6 +187,22 @@ struct s3c2410_nand_info { #endif }; +struct s3c24XX_nand_devtype_data { + enum s3c_cpu_type type; +}; + +static const struct s3c24XX_nand_devtype_data s3c2410_nand_devtype_data = { + .type = TYPE_S3C2410, +}; + +static const struct s3c24XX_nand_devtype_data s3c2412_nand_devtype_data = { + .type = TYPE_S3C2412, +}; + +static const struct s3c24XX_nand_devtype_data s3c2440_nand_devtype_data = { + .type = TYPE_S3C2440, +}; + /* conversion functions */ static struct s3c2410_nand_mtd *s3c2410_nand_mtd_toours(struct mtd_info *mtd) @@ -794,6 +812,30 @@ static int s3c2410_nand_add_partition(struct s3c2410_nand_info *info, return -ENODEV; } +static int s3c2410_nand_setup_data_interface(struct mtd_info *mtd, + const struct nand_data_interface *conf, + bool check_only) +{ + struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd); + struct s3c2410_platform_nand *pdata = info->platform; + const struct nand_sdr_timings *timings; + int tacls; + + timings = nand_get_sdr_timings(conf); + if (IS_ERR(timings)) + return -ENOTSUPP; + + tacls = timings->tCLS_min - timings->tWP_min; + if (tacls < 0) + tacls = 0; + + pdata->tacls = DIV_ROUND_UP(tacls, 1000); + pdata->twrph0 = DIV_ROUND_UP(timings->tWP_min, 1000); + pdata->twrph1 = DIV_ROUND_UP(timings->tCLH_min, 1000); + + return s3c2410_nand_setrate(info); +} + /** * s3c2410_nand_init_chip - initialise a single instance of an chip * @info: The base NAND controller the chip is on. @@ -808,9 +850,12 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info, struct s3c2410_nand_mtd *nmtd, struct s3c2410_nand_set *set) { + struct device_node *np = info->device->of_node; struct nand_chip *chip = &nmtd->chip; void __iomem *regs = info->regs; + nand_set_flash_node(chip, set->of_node); + chip->write_buf = s3c2410_nand_write_buf; chip->read_buf = s3c2410_nand_read_buf; chip->select_chip = s3c2410_nand_select_chip; @@ -819,6 +864,13 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info, chip->options = set->options; chip->controller = &info->controller; + /* + * let's keep behavior unchanged for legacy boards booting via pdata and + * auto-detect timings only when booting with a device tree. + */ + if (np) + chip->setup_data_interface = s3c2410_nand_setup_data_interface; + switch (info->cpu_type) { case TYPE_S3C2410: chip->IO_ADDR_W = regs + S3C2410_NFDATA; @@ -858,13 +910,12 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info, chip->ecc.mode = info->platform->ecc_mode; - /* If you use u-boot BBT creation code, specifying this flag will - * let the kernel fish out the BBT from the NAND, and also skip the - * full NAND scan that can take 1/2s or so. Little things... */ - if (set->flash_bbt) { + /* + * If you use u-boot BBT creation code, specifying this flag will + * let the kernel fish out the BBT from the NAND. + */ + if (set->flash_bbt) chip->bbt_options |= NAND_BBT_USE_FLASH; - chip->options |= NAND_SKIP_BBTSCAN; - } } /** @@ -945,6 +996,78 @@ static int s3c2410_nand_update_chip(struct s3c2410_nand_info *info, return -EINVAL; } + if (chip->bbt_options & NAND_BBT_USE_FLASH) + chip->options |= NAND_SKIP_BBTSCAN; + + return 0; +} + +static const struct of_device_id s3c24xx_nand_dt_ids[] = { + { + .compatible = "samsung,s3c2410-nand", + .data = &s3c2410_nand_devtype_data, + }, { + /* also compatible with s3c6400 */ + .compatible = "samsung,s3c2412-nand", + .data = &s3c2412_nand_devtype_data, + }, { + .compatible = "samsung,s3c2440-nand", + .data = &s3c2440_nand_devtype_data, + }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, s3c24xx_nand_dt_ids); + +static int s3c24xx_nand_probe_dt(struct platform_device *pdev) +{ + const struct s3c24XX_nand_devtype_data *devtype_data; + struct s3c2410_platform_nand *pdata; + struct s3c2410_nand_info *info = platform_get_drvdata(pdev); + struct device_node *np = pdev->dev.of_node, *child; + struct s3c2410_nand_set *sets; + + devtype_data = of_device_get_match_data(&pdev->dev); + if (!devtype_data) + return -ENODEV; + + info->cpu_type = devtype_data->type; + + pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return -ENOMEM; + + pdev->dev.platform_data = pdata; + + pdata->nr_sets = of_get_child_count(np); + if (!pdata->nr_sets) + return 0; + + sets = devm_kzalloc(&pdev->dev, sizeof(*sets) * pdata->nr_sets, + GFP_KERNEL); + if (!sets) + return -ENOMEM; + + pdata->sets = sets; + + for_each_available_child_of_node(np, child) { + sets->name = (char *)child->name; + sets->of_node = child; + sets->nr_chips = 1; + + of_node_get(child); + + sets++; + } + + return 0; +} + +static int s3c24xx_nand_probe_pdata(struct platform_device *pdev) +{ + struct s3c2410_nand_info *info = platform_get_drvdata(pdev); + + info->cpu_type = platform_get_device_id(pdev)->driver_data; + return 0; } @@ -957,8 +1080,7 @@ static int s3c2410_nand_update_chip(struct s3c2410_nand_info *info, */ static int s3c24xx_nand_probe(struct platform_device *pdev) { - struct s3c2410_platform_nand *plat = to_nand_plat(pdev); - enum s3c_cpu_type cpu_type; + struct s3c2410_platform_nand *plat; struct s3c2410_nand_info *info; struct s3c2410_nand_mtd *nmtd; struct s3c2410_nand_set *sets; @@ -968,8 +1090,6 @@ static int s3c24xx_nand_probe(struct platform_device *pdev) int nr_sets; int setno; - cpu_type = platform_get_device_id(pdev)->driver_data; - info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL); if (info == NULL) { err = -ENOMEM; @@ -991,6 +1111,16 @@ static int s3c24xx_nand_probe(struct platform_device *pdev) s3c2410_nand_clk_set_state(info, CLOCK_ENABLE); + if (pdev->dev.of_node) + err = s3c24xx_nand_probe_dt(pdev); + else + err = s3c24xx_nand_probe_pdata(pdev); + + if (err) + goto exit_error; + + plat = to_nand_plat(pdev); + /* allocate and map the resource */ /* currently we assume we have the one resource */ @@ -999,7 +1129,6 @@ static int s3c24xx_nand_probe(struct platform_device *pdev) info->device = &pdev->dev; info->platform = plat; - info->cpu_type = cpu_type; info->regs = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(info->regs)) { @@ -1009,12 +1138,6 @@ static int s3c24xx_nand_probe(struct platform_device *pdev) dev_dbg(&pdev->dev, "mapped registers at %p\n", info->regs); - /* initialise the hardware */ - - err = s3c2410_nand_inithw(info); - if (err != 0) - goto exit_error; - sets = (plat != NULL) ? plat->sets : NULL; nr_sets = (plat != NULL) ? plat->nr_sets : 1; @@ -1058,6 +1181,11 @@ static int s3c24xx_nand_probe(struct platform_device *pdev) sets++; } + /* initialise the hardware */ + err = s3c2410_nand_inithw(info); + if (err != 0) + goto exit_error; + err = s3c2410_nand_cpufreq_register(info); if (err < 0) { dev_err(&pdev->dev, "failed to init cpufreq support\n"); @@ -1158,6 +1286,7 @@ static struct platform_driver s3c24xx_nand_driver = { .id_table = s3c24xx_driver_ids, .driver = { .name = "s3c24xx-nand", + .of_match_table = s3c24xx_nand_dt_ids, }, }; diff --git a/include/linux/platform_data/mtd-nand-s3c2410.h b/include/linux/platform_data/mtd-nand-s3c2410.h index 729af13d1773..f01659026b26 100644 --- a/include/linux/platform_data/mtd-nand-s3c2410.h +++ b/include/linux/platform_data/mtd-nand-s3c2410.h @@ -40,6 +40,7 @@ struct s3c2410_nand_set { char *name; int *nr_map; struct mtd_partition *partitions; + struct device_node *of_node; }; struct s3c2410_platform_nand { -- cgit v1.2.3 From 204e7ecd47e26cc12d9e8e8a7e7a2eeb9573f0ba Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sat, 1 Oct 2016 10:24:02 +0200 Subject: mtd: nand: Add a few more timings to nand_sdr_timings Add the tR_max, tBERS_max, tPROG_max and tCCS_min timings to the nand_sdr_timings struct. Assign default/safe values for the statically defined timings, and extract them from the ONFI parameter table if the NAND is ONFI compliant. Signed-off-by: Boris Brezillon Tested-by: Marc Gonzalez --- drivers/mtd/nand/nand_timings.c | 26 +++++++++++++++++++++++++- include/linux/mtd/nand.h | 8 ++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/nand_timings.c b/drivers/mtd/nand/nand_timings.c index 13a587407be3..f06312df3669 100644 --- a/drivers/mtd/nand/nand_timings.c +++ b/drivers/mtd/nand/nand_timings.c @@ -18,6 +18,8 @@ static const struct nand_data_interface onfi_sdr_timings[] = { { .type = NAND_SDR_IFACE, .timings.sdr = { + .tCCS_min = 500000, + .tR_max = 200000000, .tADL_min = 400000, .tALH_min = 20000, .tALS_min = 50000, @@ -58,6 +60,8 @@ static const struct nand_data_interface onfi_sdr_timings[] = { { .type = NAND_SDR_IFACE, .timings.sdr = { + .tCCS_min = 500000, + .tR_max = 200000000, .tADL_min = 400000, .tALH_min = 10000, .tALS_min = 25000, @@ -98,6 +102,8 @@ static const struct nand_data_interface onfi_sdr_timings[] = { { .type = NAND_SDR_IFACE, .timings.sdr = { + .tCCS_min = 500000, + .tR_max = 200000000, .tADL_min = 400000, .tALH_min = 10000, .tALS_min = 15000, @@ -138,6 +144,8 @@ static const struct nand_data_interface onfi_sdr_timings[] = { { .type = NAND_SDR_IFACE, .timings.sdr = { + .tCCS_min = 500000, + .tR_max = 200000000, .tADL_min = 400000, .tALH_min = 5000, .tALS_min = 10000, @@ -178,6 +186,8 @@ static const struct nand_data_interface onfi_sdr_timings[] = { { .type = NAND_SDR_IFACE, .timings.sdr = { + .tCCS_min = 500000, + .tR_max = 200000000, .tADL_min = 400000, .tALH_min = 5000, .tALS_min = 10000, @@ -218,6 +228,8 @@ static const struct nand_data_interface onfi_sdr_timings[] = { { .type = NAND_SDR_IFACE, .timings.sdr = { + .tCCS_min = 500000, + .tR_max = 200000000, .tADL_min = 400000, .tALH_min = 5000, .tALS_min = 10000, @@ -290,10 +302,22 @@ int onfi_init_data_interface(struct nand_chip *chip, *iface = onfi_sdr_timings[timing_mode]; /* - * TODO: initialize timings that cannot be deduced from timing mode: + * Initialize timings that cannot be deduced from timing mode: * tR, tPROG, tCCS, ... * These information are part of the ONFI parameter page. */ + if (chip->onfi_version) { + struct nand_onfi_params *params = &chip->onfi_params; + struct nand_sdr_timings *timings = &iface->timings.sdr; + + /* microseconds -> picoseconds */ + timings->tPROG_max = 1000000UL * le16_to_cpu(params->t_prog); + timings->tBERS_max = 1000000UL * le16_to_cpu(params->t_bers); + timings->tR_max = 1000000UL * le16_to_cpu(params->t_r); + + /* nanoseconds -> picoseconds */ + timings->tCCS_min = 1000UL * le16_to_cpu(params->t_ccs); + } return 0; } diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index c5d3d5024fc8..6fe83bce83a6 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -584,6 +584,10 @@ struct nand_buffers { * * All these timings are expressed in picoseconds. * + * @tBERS_max: Block erase time + * @tCCS_min: Change column setup time + * @tPROG_max: Page program time + * @tR_max: Page read time * @tALH_min: ALE hold time * @tADL_min: ALE to data loading time * @tALS_min: ALE setup time @@ -621,6 +625,10 @@ struct nand_buffers { * @tWW_min: WP# transition to WE# low */ struct nand_sdr_timings { + u32 tBERS_max; + u32 tCCS_min; + u32 tPROG_max; + u32 tR_max; u32 tALH_min; u32 tADL_min; u32 tALS_min; -- cgit v1.2.3 From 6ea40a3ba93e1b14ffb349e276f9dfefc4334b99 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sat, 1 Oct 2016 10:24:03 +0200 Subject: mtd: nand: Wait tCCS after a column change Drivers implementing ->cmd_ctrl() and relying on the default ->cmdfunc() implementation usually don't wait tCCS when a column change (RNDIN or RNDOUT) is requested. Add an option flag to ask the core to do so (note that we keep this as an opt-in to avoid breaking existing implementations), and make use of the ->data_interface information is available (otherwise, wait 500ns). Signed-off-by: Boris Brezillon Tested-by: Marc Gonzalez --- drivers/mtd/nand/nand_base.c | 26 +++++++++++++++++++++++++- include/linux/mtd/nand.h | 10 ++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index e5718e5ecf92..0acb0070280a 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -709,6 +709,25 @@ static void nand_command(struct mtd_info *mtd, unsigned int command, nand_wait_ready(mtd); } +static void nand_ccs_delay(struct nand_chip *chip) +{ + /* + * The controller already takes care of waiting for tCCS when the RNDIN + * or RNDOUT command is sent, return directly. + */ + if (!(chip->options & NAND_WAIT_TCCS)) + return; + + /* + * Wait tCCS_min if it is correctly defined, otherwise wait 500ns + * (which should be safe for all NANDs). + */ + if (chip->data_interface && chip->data_interface->timings.sdr.tCCS_min) + ndelay(chip->data_interface->timings.sdr.tCCS_min / 1000); + else + ndelay(500); +} + /** * nand_command_lp - [DEFAULT] Send command to NAND large page device * @mtd: MTD device structure @@ -773,10 +792,13 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command, case NAND_CMD_ERASE1: case NAND_CMD_ERASE2: case NAND_CMD_SEQIN: - case NAND_CMD_RNDIN: case NAND_CMD_STATUS: return; + case NAND_CMD_RNDIN: + nand_ccs_delay(chip); + return; + case NAND_CMD_RESET: if (chip->dev_ready) break; @@ -795,6 +817,8 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command, NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE); chip->cmd_ctrl(mtd, NAND_CMD_NONE, NAND_NCE | NAND_CTRL_CHANGE); + + nand_ccs_delay(chip); return; case NAND_CMD_READ0: diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 6fe83bce83a6..970ceb948835 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -210,6 +210,16 @@ enum nand_ecc_algo { */ #define NAND_USE_BOUNCE_BUFFER 0x00100000 +/* + * In case your controller is implementing ->cmd_ctrl() and is relying on the + * default ->cmdfunc() implementation, you may want to let the core handle the + * tCCS delay which is required when a column change (RNDIN or RNDOUT) is + * requested. + * If your controller already takes care of this delay, you don't need to set + * this flag. + */ +#define NAND_WAIT_TCCS 0x00200000 + /* Options set by nand scan */ /* Nand scan has allocated controller struct */ #define NAND_CONTROLLER_ALLOC 0x80000000 -- cgit v1.2.3 From 5b4e2900512321435a5cd7dd77f58f23f3109950 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Fri, 4 Nov 2016 01:10:56 -0400 Subject: net: phy: broadcom: add bcm54xx_auxctl_read Add a helper function to read the AUXCTL register for the BCM54xx. This mirrors the bcm54xx_auxctl_write function already present in the code. Signed-off-by: Jon Mason Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 10 ++++++++++ include/linux/brcmphy.h | 1 + 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 583ef8a2ec8d..3a64b3d8eca8 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -30,6 +30,16 @@ MODULE_DESCRIPTION("Broadcom PHY driver"); MODULE_AUTHOR("Maciej W. Rozycki"); MODULE_LICENSE("GPL"); +static int bcm54xx_auxctl_read(struct phy_device *phydev, u16 regnum) +{ + /* The register must be written to both the Shadow Register Select and + * the Shadow Read Register Selector + */ + phy_write(phydev, MII_BCM54XX_AUX_CTL, regnum | + regnum << MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT); + return phy_read(phydev, MII_BCM54XX_AUX_CTL); +} + static int bcm54xx_auxctl_write(struct phy_device *phydev, u16 regnum, u16 val) { return phy_write(phydev, MII_BCM54XX_AUX_CTL, regnum | val); diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 60def78c4e12..0ed66914b61c 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -110,6 +110,7 @@ #define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX 0x0200 #define MII_BCM54XX_AUXCTL_MISC_RDSEL_MISC 0x7000 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x0007 +#define MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT 12 #define MII_BCM54XX_AUXCTL_SHDWSEL_MASK 0x0007 -- cgit v1.2.3 From b14995ac2527b43a75c9190fbd4efd43fb1f4562 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Fri, 4 Nov 2016 01:10:58 -0400 Subject: net: phy: broadcom: Add BCM54810 PHY entry The BCM54810 PHY requires some semi-unique configuration, which results in some additional configuration in addition to the standard config. Also, some users of the BCM54810 require the PHY lanes to be swapped. Since there is no way to detect this, add a device tree query to see if it is applicable. Inspired-by: Vikas Soni Signed-off-by: Jon Mason Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 2 +- drivers/net/phy/broadcom.c | 58 +++++++++++++++++++++++++++++++++++++++++++++- include/linux/brcmphy.h | 9 +++++++ 3 files changed, 67 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index ff31c10a3485..d3fcfd291913 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -217,7 +217,7 @@ config BROADCOM_PHY select BCM_NET_PHYLIB ---help--- Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464, - BCM5481 and BCM5482 PHYs. + BCM5481, BCM54810 and BCM5482 PHYs. config CICADA_PHY tristate "Cicada PHYs" diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 3a64b3d8eca8..b1e32e9be1b3 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -18,7 +18,7 @@ #include #include #include - +#include #define BRCM_PHY_MODEL(phydev) \ ((phydev)->drv->phy_id & (phydev)->drv->phy_id_mask) @@ -45,6 +45,34 @@ static int bcm54xx_auxctl_write(struct phy_device *phydev, u16 regnum, u16 val) return phy_write(phydev, MII_BCM54XX_AUX_CTL, regnum | val); } +static int bcm54810_config(struct phy_device *phydev) +{ + int rc, val; + + val = bcm_phy_read_exp(phydev, BCM54810_EXP_BROADREACH_LRE_MISC_CTL); + val &= ~BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN; + rc = bcm_phy_write_exp(phydev, BCM54810_EXP_BROADREACH_LRE_MISC_CTL, + val); + if (rc < 0) + return rc; + + val = bcm54xx_auxctl_read(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC); + val &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN; + val |= MII_BCM54XX_AUXCTL_MISC_WREN; + rc = bcm54xx_auxctl_write(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC, + val); + if (rc < 0) + return rc; + + val = bcm_phy_read_shadow(phydev, BCM54810_SHD_CLK_CTL); + val &= ~BCM54810_SHD_CLK_CTL_GTXCLK_EN; + rc = bcm_phy_write_shadow(phydev, BCM54810_SHD_CLK_CTL, val); + if (rc < 0) + return rc; + + return 0; +} + /* Needs SMDSP clock enabled via bcm54xx_phydsp_config() */ static int bcm50610_a0_workaround(struct phy_device *phydev) { @@ -217,6 +245,12 @@ static int bcm54xx_config_init(struct phy_device *phydev) (phydev->dev_flags & PHY_BRCM_AUTO_PWRDWN_ENABLE)) bcm54xx_adjust_rxrefclk(phydev); + if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54810) { + err = bcm54810_config(phydev); + if (err) + return err; + } + bcm54xx_phydsp_config(phydev); return 0; @@ -314,6 +348,7 @@ static int bcm5482_read_status(struct phy_device *phydev) static int bcm5481_config_aneg(struct phy_device *phydev) { + struct device_node *np = phydev->mdio.dev.of_node; int ret; /* Aneg firsly. */ @@ -344,6 +379,14 @@ static int bcm5481_config_aneg(struct phy_device *phydev) phy_write(phydev, 0x18, reg); } + if (of_property_read_bool(np, "enet-phy-lane-swap")) { + /* Lane Swap - Undocumented register...magic! */ + ret = bcm_phy_write_exp(phydev, MII_BCM54XX_EXP_SEL_ER + 0x9, + 0x11B); + if (ret < 0) + return ret; + } + return ret; } @@ -577,6 +620,18 @@ static struct phy_driver broadcom_drivers[] = { .read_status = genphy_read_status, .ack_interrupt = bcm_phy_ack_intr, .config_intr = bcm_phy_config_intr, +}, { + .phy_id = PHY_ID_BCM54810, + .phy_id_mask = 0xfffffff0, + .name = "Broadcom BCM54810", + .features = PHY_GBIT_FEATURES | + SUPPORTED_Pause | SUPPORTED_Asym_Pause, + .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, + .config_init = bcm54xx_config_init, + .config_aneg = bcm5481_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = bcm_phy_ack_intr, + .config_intr = bcm_phy_config_intr, }, { .phy_id = PHY_ID_BCM5482, .phy_id_mask = 0xfffffff0, @@ -661,6 +716,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = { { PHY_ID_BCM54616S, 0xfffffff0 }, { PHY_ID_BCM5464, 0xfffffff0 }, { PHY_ID_BCM5481, 0xfffffff0 }, + { PHY_ID_BCM54810, 0xfffffff0 }, { PHY_ID_BCM5482, 0xfffffff0 }, { PHY_ID_BCM50610, 0xfffffff0 }, { PHY_ID_BCM50610M, 0xfffffff0 }, diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 0ed66914b61c..848dc508ef57 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -13,6 +13,7 @@ #define PHY_ID_BCM5241 0x0143bc30 #define PHY_ID_BCMAC131 0x0143bc70 #define PHY_ID_BCM5481 0x0143bca0 +#define PHY_ID_BCM54810 0x03625d00 #define PHY_ID_BCM5482 0x0143bcb0 #define PHY_ID_BCM5411 0x00206070 #define PHY_ID_BCM5421 0x002060e0 @@ -56,6 +57,7 @@ #define PHY_BRCM_EXT_IBND_TX_ENABLE 0x00002000 #define PHY_BRCM_CLEAR_RGMII_MODE 0x00004000 #define PHY_BRCM_DIS_TXCRXC_NOENRGY 0x00008000 + /* Broadcom BCM7xxx specific workarounds */ #define PHY_BRCM_7XXX_REV(x) (((x) >> 8) & 0xff) #define PHY_BRCM_7XXX_PATCH(x) ((x) & 0xff) @@ -111,6 +113,7 @@ #define MII_BCM54XX_AUXCTL_MISC_RDSEL_MISC 0x7000 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x0007 #define MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT 12 +#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN (1 << 8) #define MII_BCM54XX_AUXCTL_SHDWSEL_MASK 0x0007 @@ -192,6 +195,12 @@ #define BCM5482_SSD_SGMII_SLAVE_EN 0x0002 /* Slave mode enable */ #define BCM5482_SSD_SGMII_SLAVE_AD 0x0001 /* Slave auto-detection */ +/* BCM54810 Registers */ +#define BCM54810_EXP_BROADREACH_LRE_MISC_CTL (MII_BCM54XX_EXP_SEL_ER + 0x90) +#define BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN (1 << 0) +#define BCM54810_SHD_CLK_CTL 0x3 +#define BCM54810_SHD_CLK_CTL_GTXCLK_EN (1 << 9) + /*****************************************************************************/ /* Fast Ethernet Transceiver definitions. */ -- cgit v1.2.3 From 7c13f97ffde63cc792c49ec1513f3974f2f05229 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 4 Nov 2016 11:28:59 +0100 Subject: udp: do fwd memory scheduling on dequeue A new argument is added to __skb_recv_datagram to provide an explicit skb destructor, invoked under the receive queue lock. The UDP protocol uses such argument to perform memory reclaiming on dequeue, so that the UDP protocol does not set anymore skb->desctructor. Instead explicit memory reclaiming is performed at close() time and when skbs are removed from the receive queue. The in kernel UDP protocol users now need to call a skb_recv_udp() variant instead of skb_recv_datagram() to properly perform memory accounting on dequeue. Overall, this allows acquiring only once the receive queue lock on dequeue. Tested using pktgen with random src port, 64 bytes packet, wire-speed on a 10G link as sender and udp_sink as the receiver, using an l4 tuple rxhash to stress the contention, and one or more udp_sink instances with reuseport. nr sinks vanilla patched 1 440 560 3 2150 2300 6 3650 3800 9 4450 4600 12 6250 6450 v1 -> v2: - do rmem and allocated memory scheduling under the receive lock - do bulk scheduling in first_packet_length() and in udp_destruct_sock() - avoid the typdef for the dequeue callback Suggested-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: Paolo Abeni Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++++ include/net/udp.h | 15 +++++++++++++++ net/core/datagram.c | 17 ++++++++++++----- net/ipv4/udp.c | 42 ++++++++++++++++++++++++------------------ net/ipv6/udp.c | 3 +-- net/rxrpc/input.c | 7 +++---- net/sunrpc/svcsock.c | 2 +- net/sunrpc/xprtsock.c | 2 +- net/unix/af_unix.c | 4 ++-- 9 files changed, 63 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cc6e23eaac91..a4aeeca7e805 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3033,9 +3033,13 @@ static inline void skb_frag_list_init(struct sk_buff *skb) int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, const struct sk_buff *skb); struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err, struct sk_buff **last); struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); diff --git a/include/net/udp.h b/include/net/udp.h index 6134f37ba3ab..e6e4e19be387 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -248,6 +248,21 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb, /* net/ipv4/udp.c */ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); +void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); +static inline struct sk_buff * +__skb_recv_udp(struct sock *sk, unsigned int flags, int noblock, int *peeked, + int *off, int *err) +{ + return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), + udp_skb_destructor, peeked, off, err); +} +static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags, + int noblock, int *err) +{ + int peeked, off = 0; + + return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err); +} void udp_v4_early_demux(struct sk_buff *skb); int udp_get_port(struct sock *sk, unsigned short snum, diff --git a/net/core/datagram.c b/net/core/datagram.c index bfb973aebb5b..49816af8586b 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -165,6 +165,7 @@ done: * __skb_try_recv_datagram - Receive a datagram skbuff * @sk: socket * @flags: MSG_ flags + * @destructor: invoked under the receive lock on successful dequeue * @peeked: returns non-zero if this packet has been seen before * @off: an offset in bytes to peek skb from. Returns an offset * within an skb where data actually starts @@ -197,6 +198,8 @@ done: * the standard around please. */ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err, struct sk_buff **last) { @@ -241,9 +244,11 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, } atomic_inc(&skb->users); - } else + } else { __skb_unlink(skb, queue); - + if (destructor) + destructor(sk, skb); + } spin_unlock_irqrestore(&queue->lock, cpu_flags); *off = _off; return skb; @@ -262,6 +267,8 @@ no_packet: EXPORT_SYMBOL(__skb_try_recv_datagram); struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, + void (*destructor)(struct sock *sk, + struct sk_buff *skb), int *peeked, int *off, int *err) { struct sk_buff *skb, *last; @@ -270,8 +277,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { - skb = __skb_try_recv_datagram(sk, flags, peeked, off, err, - &last); + skb = __skb_try_recv_datagram(sk, flags, destructor, peeked, + off, err, &last); if (skb) return skb; @@ -290,7 +297,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int peeked, off = 0; return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &off, err); + NULL, &peeked, &off, err); } EXPORT_SYMBOL(skb_recv_datagram); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 28a0165cb848..097b70628631 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1173,26 +1173,26 @@ out: return ret; } +/* fully reclaim rmem/fwd memory allocated for skb */ static void udp_rmem_release(struct sock *sk, int size, int partial) { int amt; atomic_sub(size, &sk->sk_rmem_alloc); - - spin_lock_bh(&sk->sk_receive_queue.lock); sk->sk_forward_alloc += size; amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1); sk->sk_forward_alloc -= amt; - spin_unlock_bh(&sk->sk_receive_queue.lock); if (amt) __sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT); } -static void udp_rmem_free(struct sk_buff *skb) +/* Note: called with sk_receive_queue.lock held */ +void udp_skb_destructor(struct sock *sk, struct sk_buff *skb) { - udp_rmem_release(skb->sk, skb->truesize, 1); + udp_rmem_release(sk, skb->truesize, 1); } +EXPORT_SYMBOL(udp_skb_destructor); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) { @@ -1229,9 +1229,9 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) sk->sk_forward_alloc -= size; - /* the skb owner in now the udp socket */ - skb->sk = sk; - skb->destructor = udp_rmem_free; + /* no need to setup a destructor, we will explicitly release the + * forward allocated memory on dequeue + */ skb->dev = NULL; sock_skb_set_dropcount(sk, skb); @@ -1255,8 +1255,15 @@ EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb); static void udp_destruct_sock(struct sock *sk) { /* reclaim completely the forward allocated memory */ - __skb_queue_purge(&sk->sk_receive_queue); - udp_rmem_release(sk, 0, 0); + unsigned int total = 0; + struct sk_buff *skb; + + while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { + total += skb->truesize; + kfree_skb(skb); + } + udp_rmem_release(sk, total, 0); + inet_sock_destruct(sk); } @@ -1288,12 +1295,11 @@ EXPORT_SYMBOL_GPL(skb_consume_udp); */ static int first_packet_length(struct sock *sk) { - struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue; + struct sk_buff_head *rcvq = &sk->sk_receive_queue; struct sk_buff *skb; + int total = 0; int res; - __skb_queue_head_init(&list_kill); - spin_lock_bh(&rcvq->lock); while ((skb = skb_peek(rcvq)) != NULL && udp_lib_checksum_complete(skb)) { @@ -1303,12 +1309,13 @@ static int first_packet_length(struct sock *sk) IS_UDPLITE(sk)); atomic_inc(&sk->sk_drops); __skb_unlink(skb, rcvq); - __skb_queue_tail(&list_kill, skb); + total += skb->truesize; + kfree_skb(skb); } res = skb ? skb->len : -1; + if (total) + udp_rmem_release(sk, total, 1); spin_unlock_bh(&rcvq->lock); - - __skb_queue_purge(&list_kill); return res; } @@ -1363,8 +1370,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, try_again: peeking = off = sk_peek_offset(sk, flags); - skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &off, &err); + skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); if (!skb) return err; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b5a23ce8981d..5313818b7485 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -343,8 +343,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, try_again: peeking = off = sk_peek_offset(sk, flags); - skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &off, &err); + skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); if (!skb) return err; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 44fb8d893c7d..1d87b5453ef7 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1053,7 +1053,7 @@ void rxrpc_data_ready(struct sock *udp_sk) ASSERT(!irqs_disabled()); - skb = skb_recv_datagram(udp_sk, 0, 1, &ret); + skb = skb_recv_udp(udp_sk, 0, 1, &ret); if (!skb) { if (ret == -EAGAIN) return; @@ -1075,10 +1075,9 @@ void rxrpc_data_ready(struct sock *udp_sk) __UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0); - /* The socket buffer we have is owned by UDP, with UDP's data all over - * it, but we really want our own data there. + /* The UDP protocol already released all skb resources; + * we are free to add our own data there. */ - skb_orphan(skb); sp = rxrpc_skb(skb); /* dig out the RxRPC connection details */ diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index e2a55dc787e6..78da4aee3543 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -547,7 +547,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, 0, 0, MSG_PEEK | MSG_DONTWAIT); if (err >= 0) - skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err); + skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err); if (skb == NULL) { if (err != -EAGAIN) { diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 1758665d609c..7178d0aa7861 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1080,7 +1080,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport) if (sk == NULL) goto out; for (;;) { - skb = skb_recv_datagram(sk, 0, 1, &err); + skb = skb_recv_udp(sk, 0, 1, &err); if (skb != NULL) { xs_udp_data_read_skb(&transport->xprt, sk, skb); consume_skb(skb); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 145082e2ba36..87620183910e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2113,8 +2113,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, mutex_lock(&u->iolock); skip = sk_peek_offset(sk, flags); - skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err, - &last); + skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip, + &err, &last); if (skb) break; -- cgit v1.2.3 From ebcf6f979d55f35dfe36956364f0dce8c738220b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 2 Nov 2016 07:12:31 -0400 Subject: swiotlb: Drop unused functions swiotlb_map_sg and swiotlb_unmap_sg There are no users for swiotlb_map_sg or swiotlb_unmap_sg so we might as well just drop them. Signed-off-by: Alexander Duyck Acked-by: Christoph Hellwig Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/swiotlb.h | 8 -------- lib/swiotlb.c | 16 ---------------- 2 files changed, 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 5f81f8a187f2..f0d258967869 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -72,14 +72,6 @@ extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, unsigned long attrs); -extern int -swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, - enum dma_data_direction dir); - -extern void -swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, - enum dma_data_direction dir); - extern int swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 6ce764410ae4..bdcc0d8a7405 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -910,14 +910,6 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, } EXPORT_SYMBOL(swiotlb_map_sg_attrs); -int -swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ - return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, 0); -} -EXPORT_SYMBOL(swiotlb_map_sg); - /* * Unmap a set of streaming mode DMA translations. Again, cpu read rules * concerning calls here are the same as for swiotlb_unmap_page() above. @@ -938,14 +930,6 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, } EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); -void -swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ - return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, 0); -} -EXPORT_SYMBOL(swiotlb_unmap_sg); - /* * Make physical memory consistent for a set of streaming mode DMA translations * after a transfer. -- cgit v1.2.3 From 0443fa003fa199f41bfbed3012f314d02c5b1f24 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 2 Nov 2016 07:13:02 -0400 Subject: swiotlb: Add support for DMA_ATTR_SKIP_CPU_SYNC As a first step to making DMA_ATTR_SKIP_CPU_SYNC apply to architectures beyond just ARM I need to make it so that the swiotlb will respect the flag. In order to do that I also need to update the swiotlb-xen since it heavily makes use of the functionality. Cc: Konrad Rzeszutek Wilk Signed-off-by: Alexander Duyck Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/swiotlb-xen.c | 11 +++++---- include/linux/swiotlb.h | 6 +++-- lib/swiotlb.c | 59 ++++++++++++++++++++++++++++++----------------- 3 files changed, 49 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index b8014bf2b2ed..3d048afcee38 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -405,7 +405,8 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, */ trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); - map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir); + map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir, + attrs); if (map == SWIOTLB_MAP_ERROR) return DMA_ERROR_CODE; @@ -419,7 +420,8 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, if (dma_capable(dev, dev_addr, size)) return dev_addr; - swiotlb_tbl_unmap_single(dev, map, size, dir); + swiotlb_tbl_unmap_single(dev, map, size, dir, + attrs | DMA_ATTR_SKIP_CPU_SYNC); return DMA_ERROR_CODE; } @@ -445,7 +447,7 @@ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr, /* NOTE: We use dev_addr here, not paddr! */ if (is_xen_swiotlb_buffer(dev_addr)) { - swiotlb_tbl_unmap_single(hwdev, paddr, size, dir); + swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); return; } @@ -558,11 +560,12 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, start_dma_addr, sg_phys(sg), sg->length, - dir); + dir, attrs); if (map == SWIOTLB_MAP_ERROR) { dev_warn(hwdev, "swiotlb buffer is full\n"); /* Don't panic here, we expect map_sg users to do proper error handling. */ + attrs |= DMA_ATTR_SKIP_CPU_SYNC; xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, attrs); sg_dma_len(sgl) = 0; diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index f0d258967869..183f37c8a5e1 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -44,11 +44,13 @@ enum dma_sync_target { extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, phys_addr_t phys, size_t size, - enum dma_data_direction dir); + enum dma_data_direction dir, + unsigned long attrs); extern void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir); + size_t size, enum dma_data_direction dir, + unsigned long attrs); extern void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, diff --git a/lib/swiotlb.c b/lib/swiotlb.c index bdcc0d8a7405..8e883c762728 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -425,7 +425,8 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, phys_addr_t orig_addr, size_t size, - enum dma_data_direction dir) + enum dma_data_direction dir, + unsigned long attrs) { unsigned long flags; phys_addr_t tlb_addr; @@ -526,7 +527,8 @@ found: */ for (i = 0; i < nslots; i++) io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); - if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE); return tlb_addr; @@ -539,18 +541,20 @@ EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single); static phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size, - enum dma_data_direction dir) + enum dma_data_direction dir, unsigned long attrs) { dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start); - return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir); + return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, + dir, attrs); } /* * dma_addr is the kernel virtual address of the bounce buffer to unmap. */ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir) + size_t size, enum dma_data_direction dir, + unsigned long attrs) { unsigned long flags; int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; @@ -561,6 +565,7 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, * First, sync the memory before unmapping the entry */ if (orig_addr != INVALID_PHYS_ADDR && + !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE); @@ -654,7 +659,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, * GFP_DMA memory; fall back on map_single(), which * will grab memory from the lowest available address range. */ - phys_addr_t paddr = map_single(hwdev, 0, size, DMA_FROM_DEVICE); + phys_addr_t paddr = map_single(hwdev, 0, size, + DMA_FROM_DEVICE, 0); if (paddr == SWIOTLB_MAP_ERROR) goto err_warn; @@ -667,9 +673,13 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, (unsigned long long)dma_mask, (unsigned long long)dev_addr); - /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ + /* + * DMA_TO_DEVICE to avoid memcpy in unmap_single. + * The DMA_ATTR_SKIP_CPU_SYNC is optional. + */ swiotlb_tbl_unmap_single(hwdev, paddr, - size, DMA_TO_DEVICE); + size, DMA_TO_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); goto err_warn; } } @@ -698,8 +708,12 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, if (!is_swiotlb_buffer(paddr)) free_pages((unsigned long)vaddr, get_order(size)); else - /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */ - swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE); + /* + * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single. + * DMA_ATTR_SKIP_CPU_SYNC is optional. + */ + swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); } EXPORT_SYMBOL(swiotlb_free_coherent); @@ -755,7 +769,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); /* Oh well, have to allocate and map a bounce buffer. */ - map = map_single(dev, phys, size, dir); + map = map_single(dev, phys, size, dir, attrs); if (map == SWIOTLB_MAP_ERROR) { swiotlb_full(dev, size, dir, 1); return phys_to_dma(dev, io_tlb_overflow_buffer); @@ -764,12 +778,13 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, dev_addr = phys_to_dma(dev, map); /* Ensure that the address returned is DMA'ble */ - if (!dma_capable(dev, dev_addr, size)) { - swiotlb_tbl_unmap_single(dev, map, size, dir); - return phys_to_dma(dev, io_tlb_overflow_buffer); - } + if (dma_capable(dev, dev_addr, size)) + return dev_addr; + + swiotlb_tbl_unmap_single(dev, map, size, dir, + attrs | DMA_ATTR_SKIP_CPU_SYNC); - return dev_addr; + return phys_to_dma(dev, io_tlb_overflow_buffer); } EXPORT_SYMBOL_GPL(swiotlb_map_page); @@ -782,14 +797,15 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page); * whatever the device wrote there. */ static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir) + size_t size, enum dma_data_direction dir, + unsigned long attrs) { phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); BUG_ON(dir == DMA_NONE); if (is_swiotlb_buffer(paddr)) { - swiotlb_tbl_unmap_single(hwdev, paddr, size, dir); + swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); return; } @@ -809,7 +825,7 @@ void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - unmap_single(hwdev, dev_addr, size, dir); + unmap_single(hwdev, dev_addr, size, dir, attrs); } EXPORT_SYMBOL_GPL(swiotlb_unmap_page); @@ -891,7 +907,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, if (swiotlb_force || !dma_capable(hwdev, dev_addr, sg->length)) { phys_addr_t map = map_single(hwdev, sg_phys(sg), - sg->length, dir); + sg->length, dir, attrs); if (map == SWIOTLB_MAP_ERROR) { /* Don't panic here, we expect map_sg users to do proper error handling. */ @@ -925,7 +941,8 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, BUG_ON(dir == DMA_NONE); for_each_sg(sgl, sg, nelems, i) - unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir); + unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, + attrs); } EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); -- cgit v1.2.3 From 0f78ba96bbcf30a78224fe56f8fd72f87915afdd Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 23 Feb 2016 15:32:14 -0800 Subject: Input: gpio_keys_polled - keep button data constant Commit 633a21d80b4a ("input: gpio_keys_polled: Add support for GPIO descriptors") placed gpio descriptor into gpio_keys_button structure, which is supposed to be part of platform data and not modifiable by the driver. To keep the data constant, let's move the descriptor to gpio_keys_button_data structure instead. Tested-by: Mika Westerberg Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/gpio_keys.c | 10 +-- drivers/input/keyboard/gpio_keys_polled.c | 105 +++++++++++++++++------------- include/linux/gpio_keys.h | 5 +- 3 files changed, 64 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index 29093657f2ef..890eb397d987 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -624,7 +624,6 @@ gpio_keys_get_devtree_pdata(struct device *dev) struct gpio_keys_button *button; int error; int nbuttons; - int i; node = dev->of_node; if (!node) @@ -640,19 +639,18 @@ gpio_keys_get_devtree_pdata(struct device *dev) if (!pdata) return ERR_PTR(-ENOMEM); - pdata->buttons = (struct gpio_keys_button *)(pdata + 1); + button = (struct gpio_keys_button *)(pdata + 1); + + pdata->buttons = button; pdata->nbuttons = nbuttons; pdata->rep = !!of_get_property(node, "autorepeat", NULL); of_property_read_string(node, "label", &pdata->name); - i = 0; for_each_available_child_of_node(node, pp) { enum of_gpio_flags flags; - button = &pdata->buttons[i++]; - button->gpio = of_get_gpio_flags(pp, 0, &flags); if (button->gpio < 0) { error = button->gpio; @@ -694,6 +692,8 @@ gpio_keys_get_devtree_pdata(struct device *dev) if (of_property_read_u32(pp, "debounce-interval", &button->debounce_interval)) button->debounce_interval = 5; + + button++; } if (pdata->nbuttons == 0) diff --git a/drivers/input/keyboard/gpio_keys_polled.c b/drivers/input/keyboard/gpio_keys_polled.c index 62bdb1d48c49..2cf407831f06 100644 --- a/drivers/input/keyboard/gpio_keys_polled.c +++ b/drivers/input/keyboard/gpio_keys_polled.c @@ -30,6 +30,7 @@ #define DRV_NAME "gpio-keys-polled" struct gpio_keys_button_data { + struct gpio_desc *gpiod; int last_state; int count; int threshold; @@ -46,7 +47,7 @@ struct gpio_keys_polled_dev { }; static void gpio_keys_button_event(struct input_polled_dev *dev, - struct gpio_keys_button *button, + const struct gpio_keys_button *button, int state) { struct gpio_keys_polled_dev *bdev = dev->private; @@ -70,15 +71,15 @@ static void gpio_keys_button_event(struct input_polled_dev *dev, } static void gpio_keys_polled_check_state(struct input_polled_dev *dev, - struct gpio_keys_button *button, + const struct gpio_keys_button *button, struct gpio_keys_button_data *bdata) { int state; if (bdata->can_sleep) - state = !!gpiod_get_value_cansleep(button->gpiod); + state = !!gpiod_get_value_cansleep(bdata->gpiod); else - state = !!gpiod_get_value(button->gpiod); + state = !!gpiod_get_value(bdata->gpiod); gpio_keys_button_event(dev, button, state); @@ -142,48 +143,35 @@ static void gpio_keys_polled_close(struct input_polled_dev *dev) pdata->disable(bdev->dev); } -static struct gpio_keys_platform_data *gpio_keys_polled_get_devtree_pdata(struct device *dev) +static struct gpio_keys_platform_data * +gpio_keys_polled_get_devtree_pdata(struct device *dev) { struct gpio_keys_platform_data *pdata; struct gpio_keys_button *button; struct fwnode_handle *child; - int error; int nbuttons; nbuttons = device_get_child_node_count(dev); if (nbuttons == 0) - return NULL; + return ERR_PTR(-EINVAL); pdata = devm_kzalloc(dev, sizeof(*pdata) + nbuttons * sizeof(*button), GFP_KERNEL); if (!pdata) return ERR_PTR(-ENOMEM); - pdata->buttons = (struct gpio_keys_button *)(pdata + 1); + button = (struct gpio_keys_button *)(pdata + 1); + + pdata->buttons = button; + pdata->nbuttons = nbuttons; pdata->rep = device_property_present(dev, "autorepeat"); device_property_read_u32(dev, "poll-interval", &pdata->poll_interval); device_for_each_child_node(dev, child) { - struct gpio_desc *desc; - - desc = devm_get_gpiod_from_child(dev, NULL, child); - if (IS_ERR(desc)) { - error = PTR_ERR(desc); - if (error != -EPROBE_DEFER) - dev_err(dev, - "Failed to get gpio flags, error: %d\n", - error); - fwnode_handle_put(child); - return ERR_PTR(error); - } - - button = &pdata->buttons[pdata->nbuttons++]; - button->gpiod = desc; - - if (fwnode_property_read_u32(child, "linux,code", &button->code)) { - dev_err(dev, "Button without keycode: %d\n", - pdata->nbuttons - 1); + if (fwnode_property_read_u32(child, "linux,code", + &button->code)) { + dev_err(dev, "button without keycode\n"); fwnode_handle_put(child); return ERR_PTR(-EINVAL); } @@ -206,10 +194,9 @@ static struct gpio_keys_platform_data *gpio_keys_polled_get_devtree_pdata(struct if (fwnode_property_read_u32(child, "debounce-interval", &button->debounce_interval)) button->debounce_interval = 5; - } - if (pdata->nbuttons == 0) - return ERR_PTR(-EINVAL); + button++; + } return pdata; } @@ -220,7 +207,7 @@ static void gpio_keys_polled_set_abs_params(struct input_dev *input, int i, min = 0, max = 0; for (i = 0; i < pdata->nbuttons; i++) { - struct gpio_keys_button *button = &pdata->buttons[i]; + const struct gpio_keys_button *button = &pdata->buttons[i]; if (button->type != EV_ABS || button->code != code) continue; @@ -230,6 +217,7 @@ static void gpio_keys_polled_set_abs_params(struct input_dev *input, if (button->value > max) max = button->value; } + input_set_abs_params(input, code, min, max, 0, 0); } @@ -242,6 +230,7 @@ MODULE_DEVICE_TABLE(of, gpio_keys_polled_of_match); static int gpio_keys_polled_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; + struct fwnode_handle *child = NULL; const struct gpio_keys_platform_data *pdata = dev_get_platdata(dev); struct gpio_keys_polled_dev *bdev; struct input_polled_dev *poll_dev; @@ -254,10 +243,6 @@ static int gpio_keys_polled_probe(struct platform_device *pdev) pdata = gpio_keys_polled_get_devtree_pdata(dev); if (IS_ERR(pdata)) return PTR_ERR(pdata); - if (!pdata) { - dev_err(dev, "missing platform data\n"); - return -EINVAL; - } } if (!pdata->poll_interval) { @@ -300,20 +285,40 @@ static int gpio_keys_polled_probe(struct platform_device *pdev) __set_bit(EV_REP, input->evbit); for (i = 0; i < pdata->nbuttons; i++) { - struct gpio_keys_button *button = &pdata->buttons[i]; + const struct gpio_keys_button *button = &pdata->buttons[i]; struct gpio_keys_button_data *bdata = &bdev->data[i]; unsigned int type = button->type ?: EV_KEY; if (button->wakeup) { dev_err(dev, DRV_NAME " does not support wakeup\n"); + fwnode_handle_put(child); return -EINVAL; } - /* - * Legacy GPIO number so request the GPIO here and - * convert it to descriptor. - */ - if (!button->gpiod && gpio_is_valid(button->gpio)) { + if (!dev_get_platdata(dev)) { + /* No legacy static platform data */ + child = device_get_next_child_node(dev, child); + if (!child) { + dev_err(dev, "missing child device node\n"); + return -EINVAL; + } + + bdata->gpiod = devm_get_gpiod_from_child(dev, NULL, + child); + if (IS_ERR(bdata->gpiod)) { + error = PTR_ERR(bdata->gpiod); + if (error != -EPROBE_DEFER) + dev_err(dev, + "failed to get gpio: %d\n", + error); + fwnode_handle_put(child); + return error; + } + } else if (gpio_is_valid(button->gpio)) { + /* + * Legacy GPIO number so request the GPIO here and + * convert it to descriptor. + */ unsigned flags = GPIOF_IN; if (button->active_low) @@ -322,18 +327,22 @@ static int gpio_keys_polled_probe(struct platform_device *pdev) error = devm_gpio_request_one(&pdev->dev, button->gpio, flags, button->desc ? : DRV_NAME); if (error) { - dev_err(dev, "unable to claim gpio %u, err=%d\n", + dev_err(dev, + "unable to claim gpio %u, err=%d\n", button->gpio, error); return error; } - button->gpiod = gpio_to_desc(button->gpio); + bdata->gpiod = gpio_to_desc(button->gpio); + if (!bdata->gpiod) { + dev_err(dev, + "unable to convert gpio %u to descriptor\n", + button->gpio); + return -EINVAL; + } } - if (IS_ERR(button->gpiod)) - return PTR_ERR(button->gpiod); - - bdata->can_sleep = gpiod_cansleep(button->gpiod); + bdata->can_sleep = gpiod_cansleep(bdata->gpiod); bdata->last_state = -1; bdata->threshold = DIV_ROUND_UP(button->debounce_interval, pdata->poll_interval); @@ -344,6 +353,8 @@ static int gpio_keys_polled_probe(struct platform_device *pdev) button->code); } + fwnode_handle_put(child); + bdev->poll_dev = poll_dev; bdev->dev = dev; bdev->pdata = pdata; diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h index ee2d8c6f9130..0b71024c082c 100644 --- a/include/linux/gpio_keys.h +++ b/include/linux/gpio_keys.h @@ -2,7 +2,6 @@ #define _GPIO_KEYS_H struct device; -struct gpio_desc; /** * struct gpio_keys_button - configuration parameters @@ -18,7 +17,6 @@ struct gpio_desc; * disable button via sysfs * @value: axis value for %EV_ABS * @irq: Irq number in case of interrupt keys - * @gpiod: GPIO descriptor */ struct gpio_keys_button { unsigned int code; @@ -31,7 +29,6 @@ struct gpio_keys_button { bool can_disable; int value; unsigned int irq; - struct gpio_desc *gpiod; }; /** @@ -46,7 +43,7 @@ struct gpio_keys_button { * @name: input device name */ struct gpio_keys_platform_data { - struct gpio_keys_button *buttons; + const struct gpio_keys_button *buttons; int nbuttons; unsigned int poll_interval; unsigned int rep:1; -- cgit v1.2.3 From 63e95b5c4f16e156b98adcf2f7d820ba941c82a3 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Tue, 8 Nov 2016 11:32:20 +1100 Subject: dax: coordinate locking for offsets in PMD range DAX radix tree locking currently locks entries based on the unique combination of the 'mapping' pointer and the pgoff_t 'index' for the entry. This works for PTEs, but as we move to PMDs we will need to have all the offsets within the range covered by the PMD to map to the same bit lock. To accomplish this, for ranges covered by a PMD entry we will instead lock based on the page offset of the beginning of the PMD entry. The 'mapping' pointer is still used in the same way. Signed-off-by: Ross Zwisler Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Dave Chinner --- fs/dax.c | 65 +++++++++++++++++++++++++++++++++-------------------- include/linux/dax.h | 2 +- mm/filemap.c | 2 +- 3 files changed, 43 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/fs/dax.c b/fs/dax.c index 835e7f082cff..72387023545e 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -64,14 +64,6 @@ static int __init init_dax_wait_table(void) } fs_initcall(init_dax_wait_table); -static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping, - pgoff_t index) -{ - unsigned long hash = hash_long((unsigned long)mapping ^ index, - DAX_WAIT_TABLE_BITS); - return wait_table + hash; -} - static long dax_map_atomic(struct block_device *bdev, struct blk_dax_ctl *dax) { struct request_queue *q = bdev->bd_queue; @@ -285,7 +277,7 @@ EXPORT_SYMBOL_GPL(dax_do_io); */ struct exceptional_entry_key { struct address_space *mapping; - unsigned long index; + pgoff_t entry_start; }; struct wait_exceptional_entry_queue { @@ -293,6 +285,26 @@ struct wait_exceptional_entry_queue { struct exceptional_entry_key key; }; +static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping, + pgoff_t index, void *entry, struct exceptional_entry_key *key) +{ + unsigned long hash; + + /* + * If 'entry' is a PMD, align the 'index' that we use for the wait + * queue to the start of that PMD. This ensures that all offsets in + * the range covered by the PMD map to the same bit lock. + */ + if (RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD) + index &= ~((1UL << (PMD_SHIFT - PAGE_SHIFT)) - 1); + + key->mapping = mapping; + key->entry_start = index; + + hash = hash_long((unsigned long)mapping ^ index, DAX_WAIT_TABLE_BITS); + return wait_table + hash; +} + static int wake_exceptional_entry_func(wait_queue_t *wait, unsigned int mode, int sync, void *keyp) { @@ -301,7 +313,7 @@ static int wake_exceptional_entry_func(wait_queue_t *wait, unsigned int mode, container_of(wait, struct wait_exceptional_entry_queue, wait); if (key->mapping != ewait->key.mapping || - key->index != ewait->key.index) + key->entry_start != ewait->key.entry_start) return 0; return autoremove_wake_function(wait, mode, sync, NULL); } @@ -359,12 +371,10 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping, { void *entry, **slot; struct wait_exceptional_entry_queue ewait; - wait_queue_head_t *wq = dax_entry_waitqueue(mapping, index); + wait_queue_head_t *wq; init_wait(&ewait.wait); ewait.wait.func = wake_exceptional_entry_func; - ewait.key.mapping = mapping; - ewait.key.index = index; for (;;) { entry = __radix_tree_lookup(&mapping->page_tree, index, NULL, @@ -375,6 +385,8 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping, *slotp = slot; return entry; } + + wq = dax_entry_waitqueue(mapping, index, entry, &ewait.key); prepare_to_wait_exclusive(wq, &ewait.wait, TASK_UNINTERRUPTIBLE); spin_unlock_irq(&mapping->tree_lock); @@ -447,10 +459,20 @@ restart: return entry; } +/* + * We do not necessarily hold the mapping->tree_lock when we call this + * function so it is possible that 'entry' is no longer a valid item in the + * radix tree. This is okay, though, because all we really need to do is to + * find the correct waitqueue where tasks might be sleeping waiting for that + * old 'entry' and wake them. + */ void dax_wake_mapping_entry_waiter(struct address_space *mapping, - pgoff_t index, bool wake_all) + pgoff_t index, void *entry, bool wake_all) { - wait_queue_head_t *wq = dax_entry_waitqueue(mapping, index); + struct exceptional_entry_key key; + wait_queue_head_t *wq; + + wq = dax_entry_waitqueue(mapping, index, entry, &key); /* * Checking for locked entry and prepare_to_wait_exclusive() happens @@ -458,13 +480,8 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping, * So at this point all tasks that could have seen our entry locked * must be in the waitqueue and the following check will see them. */ - if (waitqueue_active(wq)) { - struct exceptional_entry_key key; - - key.mapping = mapping; - key.index = index; + if (waitqueue_active(wq)) __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key); - } } void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index) @@ -480,7 +497,7 @@ void dax_unlock_mapping_entry(struct address_space *mapping, pgoff_t index) } unlock_slot(mapping, slot); spin_unlock_irq(&mapping->tree_lock); - dax_wake_mapping_entry_waiter(mapping, index, false); + dax_wake_mapping_entry_waiter(mapping, index, entry, false); } static void put_locked_mapping_entry(struct address_space *mapping, @@ -505,7 +522,7 @@ static void put_unlocked_mapping_entry(struct address_space *mapping, return; /* We have to wake up next waiter for the radix tree entry lock */ - dax_wake_mapping_entry_waiter(mapping, index, false); + dax_wake_mapping_entry_waiter(mapping, index, entry, false); } /* @@ -532,7 +549,7 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) radix_tree_delete(&mapping->page_tree, index); mapping->nrexceptional--; spin_unlock_irq(&mapping->tree_lock); - dax_wake_mapping_entry_waiter(mapping, index, true); + dax_wake_mapping_entry_waiter(mapping, index, entry, true); return 1; } diff --git a/include/linux/dax.h b/include/linux/dax.h index add6c4bc568f..a41a747d6112 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -22,7 +22,7 @@ int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); void dax_wake_mapping_entry_waiter(struct address_space *mapping, - pgoff_t index, bool wake_all); + pgoff_t index, void *entry, bool wake_all); #ifdef CONFIG_FS_DAX struct page *read_dax_sector(struct block_device *bdev, sector_t n); diff --git a/mm/filemap.c b/mm/filemap.c index 849f459ad078..1ffb7dcd1b5d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -143,7 +143,7 @@ static int page_cache_tree_insert(struct address_space *mapping, if (node) workingset_node_pages_dec(node); /* Wakeup waiters for exceptional entry lock */ - dax_wake_mapping_entry_waiter(mapping, page->index, + dax_wake_mapping_entry_waiter(mapping, page->index, p, false); } } -- cgit v1.2.3 From b9fde0462e34a05b25c3d68d344971865659abae Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Tue, 8 Nov 2016 11:32:35 +1100 Subject: dax: remove dax_pmd_fault() dax_pmd_fault() is the old struct buffer_head + get_block_t based 2 MiB DAX fault handler. This fault handler has been disabled for several kernel releases, and support for PMDs will be reintroduced using the struct iomap interface instead. Signed-off-by: Ross Zwisler Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Dave Chinner --- fs/dax.c | 213 ---------------------------------------------------- include/linux/dax.h | 6 +- 2 files changed, 1 insertion(+), 218 deletions(-) (limited to 'include/linux') diff --git a/fs/dax.c b/fs/dax.c index 72387023545e..3d0b1032c555 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -915,219 +915,6 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, } EXPORT_SYMBOL_GPL(dax_fault); -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) -/* - * The 'colour' (ie low bits) within a PMD of a page offset. This comes up - * more often than one might expect in the below function. - */ -#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1) - -static void __dax_dbg(struct buffer_head *bh, unsigned long address, - const char *reason, const char *fn) -{ - if (bh) { - char bname[BDEVNAME_SIZE]; - bdevname(bh->b_bdev, bname); - pr_debug("%s: %s addr: %lx dev %s state %lx start %lld " - "length %zd fallback: %s\n", fn, current->comm, - address, bname, bh->b_state, (u64)bh->b_blocknr, - bh->b_size, reason); - } else { - pr_debug("%s: %s addr: %lx fallback: %s\n", fn, - current->comm, address, reason); - } -} - -#define dax_pmd_dbg(bh, address, reason) __dax_dbg(bh, address, reason, "dax_pmd") - -/** - * dax_pmd_fault - handle a PMD fault on a DAX file - * @vma: The virtual memory area where the fault occurred - * @vmf: The description of the fault - * @get_block: The filesystem method used to translate file offsets to blocks - * - * When a page fault occurs, filesystems may call this helper in their - * pmd_fault handler for DAX files. - */ -int dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmd, unsigned int flags, get_block_t get_block) -{ - struct file *file = vma->vm_file; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - struct buffer_head bh; - unsigned blkbits = inode->i_blkbits; - unsigned long pmd_addr = address & PMD_MASK; - bool write = flags & FAULT_FLAG_WRITE; - struct block_device *bdev; - pgoff_t size, pgoff; - sector_t block; - int result = 0; - bool alloc = false; - - /* dax pmd mappings require pfn_t_devmap() */ - if (!IS_ENABLED(CONFIG_FS_DAX_PMD)) - return VM_FAULT_FALLBACK; - - /* Fall back to PTEs if we're going to COW */ - if (write && !(vma->vm_flags & VM_SHARED)) { - split_huge_pmd(vma, pmd, address); - dax_pmd_dbg(NULL, address, "cow write"); - return VM_FAULT_FALLBACK; - } - /* If the PMD would extend outside the VMA */ - if (pmd_addr < vma->vm_start) { - dax_pmd_dbg(NULL, address, "vma start unaligned"); - return VM_FAULT_FALLBACK; - } - if ((pmd_addr + PMD_SIZE) > vma->vm_end) { - dax_pmd_dbg(NULL, address, "vma end unaligned"); - return VM_FAULT_FALLBACK; - } - - pgoff = linear_page_index(vma, pmd_addr); - size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (pgoff >= size) - return VM_FAULT_SIGBUS; - /* If the PMD would cover blocks out of the file */ - if ((pgoff | PG_PMD_COLOUR) >= size) { - dax_pmd_dbg(NULL, address, - "offset + huge page size > file size"); - return VM_FAULT_FALLBACK; - } - - memset(&bh, 0, sizeof(bh)); - bh.b_bdev = inode->i_sb->s_bdev; - block = (sector_t)pgoff << (PAGE_SHIFT - blkbits); - - bh.b_size = PMD_SIZE; - - if (get_block(inode, block, &bh, 0) != 0) - return VM_FAULT_SIGBUS; - - if (!buffer_mapped(&bh) && write) { - if (get_block(inode, block, &bh, 1) != 0) - return VM_FAULT_SIGBUS; - alloc = true; - WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh)); - } - - bdev = bh.b_bdev; - - if (bh.b_size < PMD_SIZE) { - dax_pmd_dbg(&bh, address, "allocated block too small"); - return VM_FAULT_FALLBACK; - } - - /* - * If we allocated new storage, make sure no process has any - * zero pages covering this hole - */ - if (alloc) { - loff_t lstart = pgoff << PAGE_SHIFT; - loff_t lend = lstart + PMD_SIZE - 1; /* inclusive */ - - truncate_pagecache_range(inode, lstart, lend); - } - - if (!write && !buffer_mapped(&bh)) { - spinlock_t *ptl; - pmd_t entry; - struct page *zero_page = mm_get_huge_zero_page(vma->vm_mm); - - if (unlikely(!zero_page)) { - dax_pmd_dbg(&bh, address, "no zero page"); - goto fallback; - } - - ptl = pmd_lock(vma->vm_mm, pmd); - if (!pmd_none(*pmd)) { - spin_unlock(ptl); - dax_pmd_dbg(&bh, address, "pmd already present"); - goto fallback; - } - - dev_dbg(part_to_dev(bdev->bd_part), - "%s: %s addr: %lx pfn: sect: %llx\n", - __func__, current->comm, address, - (unsigned long long) to_sector(&bh, inode)); - - entry = mk_pmd(zero_page, vma->vm_page_prot); - entry = pmd_mkhuge(entry); - set_pmd_at(vma->vm_mm, pmd_addr, pmd, entry); - result = VM_FAULT_NOPAGE; - spin_unlock(ptl); - } else { - struct blk_dax_ctl dax = { - .sector = to_sector(&bh, inode), - .size = PMD_SIZE, - }; - long length = dax_map_atomic(bdev, &dax); - - if (length < 0) { - dax_pmd_dbg(&bh, address, "dax-error fallback"); - goto fallback; - } - if (length < PMD_SIZE) { - dax_pmd_dbg(&bh, address, "dax-length too small"); - dax_unmap_atomic(bdev, &dax); - goto fallback; - } - if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR) { - dax_pmd_dbg(&bh, address, "pfn unaligned"); - dax_unmap_atomic(bdev, &dax); - goto fallback; - } - - if (!pfn_t_devmap(dax.pfn)) { - dax_unmap_atomic(bdev, &dax); - dax_pmd_dbg(&bh, address, "pfn not in memmap"); - goto fallback; - } - dax_unmap_atomic(bdev, &dax); - - /* - * For PTE faults we insert a radix tree entry for reads, and - * leave it clean. Then on the first write we dirty the radix - * tree entry via the dax_pfn_mkwrite() path. This sequence - * allows the dax_pfn_mkwrite() call to be simpler and avoid a - * call into get_block() to translate the pgoff to a sector in - * order to be able to create a new radix tree entry. - * - * The PMD path doesn't have an equivalent to - * dax_pfn_mkwrite(), though, so for a read followed by a - * write we traverse all the way through dax_pmd_fault() - * twice. This means we can just skip inserting a radix tree - * entry completely on the initial read and just wait until - * the write to insert a dirty entry. - */ - if (write) { - /* - * We should insert radix-tree entry and dirty it here. - * For now this is broken... - */ - } - - dev_dbg(part_to_dev(bdev->bd_part), - "%s: %s addr: %lx pfn: %lx sect: %llx\n", - __func__, current->comm, address, - pfn_t_to_pfn(dax.pfn), - (unsigned long long) dax.sector); - result |= vmf_insert_pfn_pmd(vma, address, pmd, - dax.pfn, write); - } - - out: - return result; - - fallback: - count_vm_event(THP_FAULT_FALLBACK); - result = VM_FAULT_FALLBACK; - goto out; -} -EXPORT_SYMBOL_GPL(dax_pmd_fault); -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ - /** * dax_pfn_mkwrite - handle first write to DAX page * @vma: The virtual memory area where the fault occurred diff --git a/include/linux/dax.h b/include/linux/dax.h index a41a747d6112..0f74866edae6 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -48,16 +48,12 @@ static inline int __dax_zero_page_range(struct block_device *bdev, } #endif -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) -int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *, - unsigned int flags, get_block_t); -#else static inline int dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, unsigned int flags, get_block_t gb) { return VM_FAULT_FALLBACK; } -#endif + int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *); #define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb) -- cgit v1.2.3 From 11c59c92f44d9272db7655a462608658a6d95013 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Tue, 8 Nov 2016 11:32:46 +1100 Subject: dax: correct dax iomap code namespace The recently added DAX functions that use the new struct iomap data structure were named iomap_dax_rw(), iomap_dax_fault() and iomap_dax_actor(). These are actually defined in fs/dax.c, though, so should be part of the "dax" namespace and not the "iomap" namespace. Rename them to dax_iomap_rw(), dax_iomap_fault() and dax_iomap_actor() respectively. Signed-off-by: Ross Zwisler Suggested-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Dave Chinner --- fs/dax.c | 16 ++++++++-------- fs/ext2/file.c | 6 +++--- fs/xfs/xfs_file.c | 8 ++++---- include/linux/dax.h | 4 ++-- 4 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/fs/dax.c b/fs/dax.c index 3d0b1032c555..fdbd7a1ec6cf 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1031,7 +1031,7 @@ EXPORT_SYMBOL_GPL(dax_truncate_page); #ifdef CONFIG_FS_IOMAP static loff_t -iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data, +dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, struct iomap *iomap) { struct iov_iter *iter = data; @@ -1088,7 +1088,7 @@ iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data, } /** - * iomap_dax_rw - Perform I/O to a DAX file + * dax_iomap_rw - Perform I/O to a DAX file * @iocb: The control block for this I/O * @iter: The addresses to do I/O from or to * @ops: iomap ops passed from the file system @@ -1098,7 +1098,7 @@ iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data, * and evicting any page cache pages in the region under I/O. */ ssize_t -iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter, +dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, struct iomap_ops *ops) { struct address_space *mapping = iocb->ki_filp->f_mapping; @@ -1128,7 +1128,7 @@ iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter, while (iov_iter_count(iter)) { ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, - iter, iomap_dax_actor); + iter, dax_iomap_actor); if (ret <= 0) break; pos += ret; @@ -1138,10 +1138,10 @@ iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter, iocb->ki_pos += done; return done ? done : ret; } -EXPORT_SYMBOL_GPL(iomap_dax_rw); +EXPORT_SYMBOL_GPL(dax_iomap_rw); /** - * iomap_dax_fault - handle a page fault on a DAX file + * dax_iomap_fault - handle a page fault on a DAX file * @vma: The virtual memory area where the fault occurred * @vmf: The description of the fault * @ops: iomap ops passed from the file system @@ -1150,7 +1150,7 @@ EXPORT_SYMBOL_GPL(iomap_dax_rw); * or mkwrite handler for DAX files. Assumes the caller has done all the * necessary locking for the page fault to proceed successfully. */ -int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, +int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, struct iomap_ops *ops) { struct address_space *mapping = vma->vm_file->f_mapping; @@ -1252,5 +1252,5 @@ int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, return VM_FAULT_SIGBUS | major; return VM_FAULT_NOPAGE | major; } -EXPORT_SYMBOL_GPL(iomap_dax_fault); +EXPORT_SYMBOL_GPL(dax_iomap_fault); #endif /* CONFIG_FS_IOMAP */ diff --git a/fs/ext2/file.c b/fs/ext2/file.c index fb88b51ca947..b0f241528a30 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -38,7 +38,7 @@ static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) return 0; /* skip atime */ inode_lock_shared(inode); - ret = iomap_dax_rw(iocb, to, &ext2_iomap_ops); + ret = dax_iomap_rw(iocb, to, &ext2_iomap_ops); inode_unlock_shared(inode); file_accessed(iocb->ki_filp); @@ -62,7 +62,7 @@ static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ret) goto out_unlock; - ret = iomap_dax_rw(iocb, from, &ext2_iomap_ops); + ret = dax_iomap_rw(iocb, from, &ext2_iomap_ops); if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { i_size_write(inode, iocb->ki_pos); mark_inode_dirty(inode); @@ -99,7 +99,7 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } down_read(&ei->dax_sem); - ret = iomap_dax_fault(vma, vmf, &ext2_iomap_ops); + ret = dax_iomap_fault(vma, vmf, &ext2_iomap_ops); up_read(&ei->dax_sem); if (vmf->flags & FAULT_FLAG_WRITE) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index a314fc7b56fa..e7f35d548cfc 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -344,7 +344,7 @@ xfs_file_dax_read( return 0; /* skip atime */ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); - ret = iomap_dax_rw(iocb, to, &xfs_iomap_ops); + ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops); xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); file_accessed(iocb->ki_filp); @@ -691,7 +691,7 @@ xfs_file_dax_write( trace_xfs_file_dax_write(ip, count, pos); - ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops); + ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops); if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { i_size_write(inode, iocb->ki_pos); error = xfs_setfilesize(ip, pos, ret); @@ -1640,7 +1640,7 @@ xfs_filemap_page_mkwrite( xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (IS_DAX(inode)) { - ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops); + ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops); } else { ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops); ret = block_page_mkwrite_return(ret); @@ -1674,7 +1674,7 @@ xfs_filemap_fault( * changes to xfs_get_blocks_direct() to map unwritten extent * ioend for conversion on read-only mappings. */ - ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops); + ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops); } else ret = filemap_fault(vma, vmf); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); diff --git a/include/linux/dax.h b/include/linux/dax.h index 0f74866edae6..a3dfee4cb03f 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -11,13 +11,13 @@ struct iomap_ops; /* We use lowest available exceptional entry bit for locking */ #define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT) -ssize_t iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter, +ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, struct iomap_ops *ops); ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, get_block_t, dio_iodone_t, int flags); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t); -int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, +int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, struct iomap_ops *ops); int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); -- cgit v1.2.3 From fa28f7296a7ce38ed15dc06bd2149e04c8db9d4b Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Tue, 8 Nov 2016 11:33:35 +1100 Subject: dax: move RADIX_DAX_* defines to dax.h The RADIX_DAX_* defines currently mostly live in fs/dax.c, with just RADIX_DAX_ENTRY_LOCK being in include/linux/dax.h so it can be used in mm/filemap.c. When we add PMD support, though, mm/filemap.c will also need access to the RADIX_DAX_PTE type so it can properly construct a 4k sized empty entry. Instead of shifting the defines between dax.c and dax.h as they are individually used in other code, just move them wholesale to dax.h so they'll be available when we need them. Signed-off-by: Ross Zwisler Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Dave Chinner --- fs/dax.c | 14 -------------- include/linux/dax.h | 15 ++++++++++++++- 2 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/fs/dax.c b/fs/dax.c index 6edd89b3b69c..c45cc4d8e996 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -34,20 +34,6 @@ #include #include "internal.h" -/* - * We use lowest available bit in exceptional entry for locking, other two - * bits to determine entry type. In total 3 special bits. - */ -#define RADIX_DAX_SHIFT (RADIX_TREE_EXCEPTIONAL_SHIFT + 3) -#define RADIX_DAX_PTE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1)) -#define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2)) -#define RADIX_DAX_TYPE_MASK (RADIX_DAX_PTE | RADIX_DAX_PMD) -#define RADIX_DAX_TYPE(entry) ((unsigned long)entry & RADIX_DAX_TYPE_MASK) -#define RADIX_DAX_SECTOR(entry) (((unsigned long)entry >> RADIX_DAX_SHIFT)) -#define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \ - RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE) | \ - RADIX_TREE_EXCEPTIONAL_ENTRY)) - /* We choose 4096 entries - same as per-zone page wait tables */ #define DAX_WAIT_TABLE_BITS 12 #define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS) diff --git a/include/linux/dax.h b/include/linux/dax.h index a3dfee4cb03f..e9ea78c1cf98 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -8,8 +8,21 @@ struct iomap_ops; -/* We use lowest available exceptional entry bit for locking */ +/* + * We use lowest available bit in exceptional entry for locking, other two + * bits to determine entry type. In total 3 special bits. + */ +#define RADIX_DAX_SHIFT (RADIX_TREE_EXCEPTIONAL_SHIFT + 3) #define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT) +#define RADIX_DAX_PTE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1)) +#define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2)) +#define RADIX_DAX_TYPE_MASK (RADIX_DAX_PTE | RADIX_DAX_PMD) +#define RADIX_DAX_TYPE(entry) ((unsigned long)entry & RADIX_DAX_TYPE_MASK) +#define RADIX_DAX_SECTOR(entry) (((unsigned long)entry >> RADIX_DAX_SHIFT)) +#define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \ + RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE) | \ + RADIX_TREE_EXCEPTIONAL_ENTRY)) + ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, struct iomap_ops *ops); -- cgit v1.2.3 From 642261ac995e01d7837db1f4b90181496f7e6835 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Tue, 8 Nov 2016 11:34:45 +1100 Subject: dax: add struct iomap based DAX PMD support DAX PMDs have been disabled since Jan Kara introduced DAX radix tree based locking. This patch allows DAX PMDs to participate in the DAX radix tree based locking scheme so that they can be re-enabled using the new struct iomap based fault handlers. There are currently three types of DAX 4k entries: 4k zero pages, 4k DAX mappings that have an associated block allocation, and 4k DAX empty entries. The empty entries exist to provide locking for the duration of a given page fault. This patch adds three equivalent 2MiB DAX entries: Huge Zero Page (HZP) entries, PMD DAX entries that have associated block allocations, and 2 MiB DAX empty entries. Unlike the 4k case where we insert a struct page* into the radix tree for 4k zero pages, for HZP we insert a DAX exceptional entry with the new RADIX_DAX_HZP flag set. This is because we use a single 2 MiB zero page in every 2MiB hole mapping, and it doesn't make sense to have that same struct page* with multiple entries in multiple trees. This would cause contention on the single page lock for the one Huge Zero Page, and it would break the page->index and page->mapping associations that are assumed to be valid in many other places in the kernel. One difficult use case is when one thread is trying to use 4k entries in radix tree for a given offset, and another thread is using 2 MiB entries for that same offset. The current code handles this by making the 2 MiB user fall back to 4k entries for most cases. This was done because it is the simplest solution, and because the use of 2MiB pages is already opportunistic. If we were to try to upgrade from 4k pages to 2MiB pages for a given range, we run into the problem of how we lock out 4k page faults for the entire 2MiB range while we clean out the radix tree so we can insert the 2MiB entry. We can solve this problem if we need to, but I think that the cases where both 2MiB entries and 4K entries are being used for the same range will be rare enough and the gain small enough that it probably won't be worth the complexity. Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Signed-off-by: Dave Chinner --- fs/dax.c | 378 +++++++++++++++++++++++++++++++++++++++++++++++----- include/linux/dax.h | 55 ++++++-- mm/filemap.c | 3 +- 3 files changed, 386 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/fs/dax.c b/fs/dax.c index 0582c7c2ae40..281e91a63367 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -76,6 +76,26 @@ static void dax_unmap_atomic(struct block_device *bdev, blk_queue_exit(bdev->bd_queue); } +static int dax_is_pmd_entry(void *entry) +{ + return (unsigned long)entry & RADIX_DAX_PMD; +} + +static int dax_is_pte_entry(void *entry) +{ + return !((unsigned long)entry & RADIX_DAX_PMD); +} + +static int dax_is_zero_entry(void *entry) +{ + return (unsigned long)entry & RADIX_DAX_HZP; +} + +static int dax_is_empty_entry(void *entry) +{ + return (unsigned long)entry & RADIX_DAX_EMPTY; +} + struct page *read_dax_sector(struct block_device *bdev, sector_t n) { struct page *page = alloc_pages(GFP_KERNEL, 0); @@ -281,7 +301,7 @@ static wait_queue_head_t *dax_entry_waitqueue(struct address_space *mapping, * queue to the start of that PMD. This ensures that all offsets in * the range covered by the PMD map to the same bit lock. */ - if (RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD) + if (dax_is_pmd_entry(entry)) index &= ~((1UL << (PMD_SHIFT - PAGE_SHIFT)) - 1); key->mapping = mapping; @@ -413,36 +433,116 @@ static void put_unlocked_mapping_entry(struct address_space *mapping, * radix tree entry locked. If the radix tree doesn't contain given index, * create empty exceptional entry for the index and return with it locked. * + * When requesting an entry with size RADIX_DAX_PMD, grab_mapping_entry() will + * either return that locked entry or will return an error. This error will + * happen if there are any 4k entries (either zero pages or DAX entries) + * within the 2MiB range that we are requesting. + * + * We always favor 4k entries over 2MiB entries. There isn't a flow where we + * evict 4k entries in order to 'upgrade' them to a 2MiB entry. A 2MiB + * insertion will fail if it finds any 4k entries already in the tree, and a + * 4k insertion will cause an existing 2MiB entry to be unmapped and + * downgraded to 4k entries. This happens for both 2MiB huge zero pages as + * well as 2MiB empty entries. + * + * The exception to this downgrade path is for 2MiB DAX PMD entries that have + * real storage backing them. We will leave these real 2MiB DAX entries in + * the tree, and PTE writes will simply dirty the entire 2MiB DAX entry. + * * Note: Unlike filemap_fault() we don't honor FAULT_FLAG_RETRY flags. For * persistent memory the benefit is doubtful. We can add that later if we can * show it helps. */ -static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index) +static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index, + unsigned long size_flag) { + bool pmd_downgrade = false; /* splitting 2MiB entry into 4k entries? */ void *entry, **slot; restart: spin_lock_irq(&mapping->tree_lock); entry = get_unlocked_mapping_entry(mapping, index, &slot); + + if (entry) { + if (size_flag & RADIX_DAX_PMD) { + if (!radix_tree_exceptional_entry(entry) || + dax_is_pte_entry(entry)) { + put_unlocked_mapping_entry(mapping, index, + entry); + entry = ERR_PTR(-EEXIST); + goto out_unlock; + } + } else { /* trying to grab a PTE entry */ + if (radix_tree_exceptional_entry(entry) && + dax_is_pmd_entry(entry) && + (dax_is_zero_entry(entry) || + dax_is_empty_entry(entry))) { + pmd_downgrade = true; + } + } + } + /* No entry for given index? Make sure radix tree is big enough. */ - if (!entry) { + if (!entry || pmd_downgrade) { int err; + if (pmd_downgrade) { + /* + * Make sure 'entry' remains valid while we drop + * mapping->tree_lock. + */ + entry = lock_slot(mapping, slot); + } + spin_unlock_irq(&mapping->tree_lock); err = radix_tree_preload( mapping_gfp_mask(mapping) & ~__GFP_HIGHMEM); - if (err) + if (err) { + if (pmd_downgrade) + put_locked_mapping_entry(mapping, index, entry); return ERR_PTR(err); - entry = (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | - RADIX_DAX_ENTRY_LOCK); + } + + /* + * Besides huge zero pages the only other thing that gets + * downgraded are empty entries which don't need to be + * unmapped. + */ + if (pmd_downgrade && dax_is_zero_entry(entry)) + unmap_mapping_range(mapping, + (index << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0); + spin_lock_irq(&mapping->tree_lock); - err = radix_tree_insert(&mapping->page_tree, index, entry); + + if (pmd_downgrade) { + radix_tree_delete(&mapping->page_tree, index); + mapping->nrexceptional--; + dax_wake_mapping_entry_waiter(mapping, index, entry, + true); + } + + entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY); + + err = __radix_tree_insert(&mapping->page_tree, index, + dax_radix_order(entry), entry); radix_tree_preload_end(); if (err) { spin_unlock_irq(&mapping->tree_lock); - /* Someone already created the entry? */ - if (err == -EEXIST) + /* + * Someone already created the entry? This is a + * normal failure when inserting PMDs in a range + * that already contains PTEs. In that case we want + * to return -EEXIST immediately. + */ + if (err == -EEXIST && !(size_flag & RADIX_DAX_PMD)) goto restart; + /* + * Our insertion of a DAX PMD entry failed, most + * likely because it collided with a PTE sized entry + * at a different index in the PMD range. We haven't + * inserted anything into the radix tree and have no + * waiters to wake. + */ return ERR_PTR(err); } /* Good, we have inserted empty locked entry into the tree. */ @@ -466,6 +566,7 @@ restart: return page; } entry = lock_slot(mapping, slot); + out_unlock: spin_unlock_irq(&mapping->tree_lock); return entry; } @@ -473,9 +574,9 @@ restart: /* * We do not necessarily hold the mapping->tree_lock when we call this * function so it is possible that 'entry' is no longer a valid item in the - * radix tree. This is okay, though, because all we really need to do is to - * find the correct waitqueue where tasks might be sleeping waiting for that - * old 'entry' and wake them. + * radix tree. This is okay because all we really need to do is to find the + * correct waitqueue where tasks might be waiting for that old 'entry' and + * wake them. */ void dax_wake_mapping_entry_waiter(struct address_space *mapping, pgoff_t index, void *entry, bool wake_all) @@ -588,11 +689,17 @@ static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size return 0; } -#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_SHIFT)) - +/* + * By this point grab_mapping_entry() has ensured that we have a locked entry + * of the appropriate size so we don't have to worry about downgrading PMDs to + * PTEs. If we happen to be trying to insert a PTE and there is a PMD + * already in the tree, we will skip the insertion and just dirty the PMD as + * appropriate. + */ static void *dax_insert_mapping_entry(struct address_space *mapping, struct vm_fault *vmf, - void *entry, sector_t sector) + void *entry, sector_t sector, + unsigned long flags) { struct radix_tree_root *page_tree = &mapping->page_tree; int error = 0; @@ -615,22 +722,35 @@ static void *dax_insert_mapping_entry(struct address_space *mapping, error = radix_tree_preload(vmf->gfp_mask & ~__GFP_HIGHMEM); if (error) return ERR_PTR(error); + } else if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_HZP)) { + /* replacing huge zero page with PMD block mapping */ + unmap_mapping_range(mapping, + (vmf->pgoff << PAGE_SHIFT) & PMD_MASK, PMD_SIZE, 0); } spin_lock_irq(&mapping->tree_lock); - new_entry = (void *)((unsigned long)RADIX_DAX_ENTRY(sector, false) | - RADIX_DAX_ENTRY_LOCK); + new_entry = dax_radix_locked_entry(sector, flags); + if (hole_fill) { __delete_from_page_cache(entry, NULL); /* Drop pagecache reference */ put_page(entry); - error = radix_tree_insert(page_tree, index, new_entry); + error = __radix_tree_insert(page_tree, index, + dax_radix_order(new_entry), new_entry); if (error) { new_entry = ERR_PTR(error); goto unlock; } mapping->nrexceptional++; - } else { + } else if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { + /* + * Only swap our new entry into the radix tree if the current + * entry is a zero page or an empty entry. If a normal PTE or + * PMD entry is already in the tree, we leave it alone. This + * means that if we are trying to insert a PTE and the + * existing entry is a PMD, we will just leave the PMD in the + * tree and dirty it if necessary. + */ void **slot; void *ret; @@ -660,7 +780,6 @@ static int dax_writeback_one(struct block_device *bdev, struct address_space *mapping, pgoff_t index, void *entry) { struct radix_tree_root *page_tree = &mapping->page_tree; - int type = RADIX_DAX_TYPE(entry); struct radix_tree_node *node; struct blk_dax_ctl dax; void **slot; @@ -681,13 +800,21 @@ static int dax_writeback_one(struct block_device *bdev, if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)) goto unlock; - if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)) { + if (WARN_ON_ONCE(dax_is_empty_entry(entry) || + dax_is_zero_entry(entry))) { ret = -EIO; goto unlock; } - dax.sector = RADIX_DAX_SECTOR(entry); - dax.size = (type == RADIX_DAX_PMD ? PMD_SIZE : PAGE_SIZE); + /* + * Even if dax_writeback_mapping_range() was given a wbc->range_start + * in the middle of a PMD, the 'index' we are given will be aligned to + * the start index of the PMD, as will the sector we pull from + * 'entry'. This allows us to flush for PMD_SIZE and not have to + * worry about partial PMD writebacks. + */ + dax.sector = dax_radix_sector(entry); + dax.size = PAGE_SIZE << dax_radix_order(entry); spin_unlock_irq(&mapping->tree_lock); /* @@ -726,12 +853,11 @@ int dax_writeback_mapping_range(struct address_space *mapping, struct block_device *bdev, struct writeback_control *wbc) { struct inode *inode = mapping->host; - pgoff_t start_index, end_index, pmd_index; + pgoff_t start_index, end_index; pgoff_t indices[PAGEVEC_SIZE]; struct pagevec pvec; bool done = false; int i, ret = 0; - void *entry; if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT)) return -EIO; @@ -741,15 +867,6 @@ int dax_writeback_mapping_range(struct address_space *mapping, start_index = wbc->range_start >> PAGE_SHIFT; end_index = wbc->range_end >> PAGE_SHIFT; - pmd_index = DAX_PMD_INDEX(start_index); - - rcu_read_lock(); - entry = radix_tree_lookup(&mapping->page_tree, pmd_index); - rcu_read_unlock(); - - /* see if the start of our range is covered by a PMD entry */ - if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD) - start_index = pmd_index; tag_pages_for_writeback(mapping, start_index, end_index); @@ -794,7 +911,7 @@ static int dax_insert_mapping(struct address_space *mapping, return PTR_ERR(dax.addr); dax_unmap_atomic(bdev, &dax); - ret = dax_insert_mapping_entry(mapping, vmf, entry, dax.sector); + ret = dax_insert_mapping_entry(mapping, vmf, entry, dax.sector, 0); if (IS_ERR(ret)) return PTR_ERR(ret); *entryp = ret; @@ -841,7 +958,7 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, bh.b_bdev = inode->i_sb->s_bdev; bh.b_size = PAGE_SIZE; - entry = grab_mapping_entry(mapping, vmf->pgoff); + entry = grab_mapping_entry(mapping, vmf->pgoff, 0); if (IS_ERR(entry)) { error = PTR_ERR(entry); goto out; @@ -1162,7 +1279,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, if (pos >= i_size_read(inode)) return VM_FAULT_SIGBUS; - entry = grab_mapping_entry(mapping, vmf->pgoff); + entry = grab_mapping_entry(mapping, vmf->pgoff, 0); if (IS_ERR(entry)) { error = PTR_ERR(entry); goto out; @@ -1264,4 +1381,191 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, return VM_FAULT_NOPAGE | major; } EXPORT_SYMBOL_GPL(dax_iomap_fault); + +#ifdef CONFIG_FS_DAX_PMD +/* + * The 'colour' (ie low bits) within a PMD of a page offset. This comes up + * more often than one might expect in the below functions. + */ +#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1) + +static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd, + struct vm_fault *vmf, unsigned long address, + struct iomap *iomap, loff_t pos, bool write, void **entryp) +{ + struct address_space *mapping = vma->vm_file->f_mapping; + struct block_device *bdev = iomap->bdev; + struct blk_dax_ctl dax = { + .sector = dax_iomap_sector(iomap, pos), + .size = PMD_SIZE, + }; + long length = dax_map_atomic(bdev, &dax); + void *ret; + + if (length < 0) /* dax_map_atomic() failed */ + return VM_FAULT_FALLBACK; + if (length < PMD_SIZE) + goto unmap_fallback; + if (pfn_t_to_pfn(dax.pfn) & PG_PMD_COLOUR) + goto unmap_fallback; + if (!pfn_t_devmap(dax.pfn)) + goto unmap_fallback; + + dax_unmap_atomic(bdev, &dax); + + ret = dax_insert_mapping_entry(mapping, vmf, *entryp, dax.sector, + RADIX_DAX_PMD); + if (IS_ERR(ret)) + return VM_FAULT_FALLBACK; + *entryp = ret; + + return vmf_insert_pfn_pmd(vma, address, pmd, dax.pfn, write); + + unmap_fallback: + dax_unmap_atomic(bdev, &dax); + return VM_FAULT_FALLBACK; +} + +static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd, + struct vm_fault *vmf, unsigned long address, + struct iomap *iomap, void **entryp) +{ + struct address_space *mapping = vma->vm_file->f_mapping; + unsigned long pmd_addr = address & PMD_MASK; + struct page *zero_page; + spinlock_t *ptl; + pmd_t pmd_entry; + void *ret; + + zero_page = mm_get_huge_zero_page(vma->vm_mm); + + if (unlikely(!zero_page)) + return VM_FAULT_FALLBACK; + + ret = dax_insert_mapping_entry(mapping, vmf, *entryp, 0, + RADIX_DAX_PMD | RADIX_DAX_HZP); + if (IS_ERR(ret)) + return VM_FAULT_FALLBACK; + *entryp = ret; + + ptl = pmd_lock(vma->vm_mm, pmd); + if (!pmd_none(*pmd)) { + spin_unlock(ptl); + return VM_FAULT_FALLBACK; + } + + pmd_entry = mk_pmd(zero_page, vma->vm_page_prot); + pmd_entry = pmd_mkhuge(pmd_entry); + set_pmd_at(vma->vm_mm, pmd_addr, pmd, pmd_entry); + spin_unlock(ptl); + return VM_FAULT_NOPAGE; +} + +int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmd, unsigned int flags, struct iomap_ops *ops) +{ + struct address_space *mapping = vma->vm_file->f_mapping; + unsigned long pmd_addr = address & PMD_MASK; + bool write = flags & FAULT_FLAG_WRITE; + unsigned int iomap_flags = write ? IOMAP_WRITE : 0; + struct inode *inode = mapping->host; + int result = VM_FAULT_FALLBACK; + struct iomap iomap = { 0 }; + pgoff_t max_pgoff, pgoff; + struct vm_fault vmf; + void *entry; + loff_t pos; + int error; + + /* Fall back to PTEs if we're going to COW */ + if (write && !(vma->vm_flags & VM_SHARED)) + goto fallback; + + /* If the PMD would extend outside the VMA */ + if (pmd_addr < vma->vm_start) + goto fallback; + if ((pmd_addr + PMD_SIZE) > vma->vm_end) + goto fallback; + + /* + * Check whether offset isn't beyond end of file now. Caller is + * supposed to hold locks serializing us with truncate / punch hole so + * this is a reliable test. + */ + pgoff = linear_page_index(vma, pmd_addr); + max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT; + + if (pgoff > max_pgoff) + return VM_FAULT_SIGBUS; + + /* If the PMD would extend beyond the file size */ + if ((pgoff | PG_PMD_COLOUR) > max_pgoff) + goto fallback; + + /* + * grab_mapping_entry() will make sure we get a 2M empty entry, a DAX + * PMD or a HZP entry. If it can't (because a 4k page is already in + * the tree, for instance), it will return -EEXIST and we just fall + * back to 4k entries. + */ + entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD); + if (IS_ERR(entry)) + goto fallback; + + /* + * Note that we don't use iomap_apply here. We aren't doing I/O, only + * setting up a mapping, so really we're using iomap_begin() as a way + * to look up our filesystem block. + */ + pos = (loff_t)pgoff << PAGE_SHIFT; + error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap); + if (error) + goto unlock_entry; + if (iomap.offset + iomap.length < pos + PMD_SIZE) + goto finish_iomap; + + vmf.pgoff = pgoff; + vmf.flags = flags; + vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO; + + switch (iomap.type) { + case IOMAP_MAPPED: + result = dax_pmd_insert_mapping(vma, pmd, &vmf, address, + &iomap, pos, write, &entry); + break; + case IOMAP_UNWRITTEN: + case IOMAP_HOLE: + if (WARN_ON_ONCE(write)) + goto finish_iomap; + result = dax_pmd_load_hole(vma, pmd, &vmf, address, &iomap, + &entry); + break; + default: + WARN_ON_ONCE(1); + break; + } + + finish_iomap: + if (ops->iomap_end) { + if (result == VM_FAULT_FALLBACK) { + ops->iomap_end(inode, pos, PMD_SIZE, 0, iomap_flags, + &iomap); + } else { + error = ops->iomap_end(inode, pos, PMD_SIZE, PMD_SIZE, + iomap_flags, &iomap); + if (error) + result = VM_FAULT_FALLBACK; + } + } + unlock_entry: + put_locked_mapping_entry(mapping, pgoff, entry); + fallback: + if (result == VM_FAULT_FALLBACK) { + split_huge_pmd(vma, pmd, address); + count_vm_event(THP_FAULT_FALLBACK); + } + return result; +} +EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault); +#endif /* CONFIG_FS_DAX_PMD */ #endif /* CONFIG_FS_IOMAP */ diff --git a/include/linux/dax.h b/include/linux/dax.h index e9ea78c1cf98..8d1a5c47945f 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -9,20 +9,32 @@ struct iomap_ops; /* - * We use lowest available bit in exceptional entry for locking, other two - * bits to determine entry type. In total 3 special bits. + * We use lowest available bit in exceptional entry for locking, one bit for + * the entry size (PMD) and two more to tell us if the entry is a huge zero + * page (HZP) or an empty entry that is just used for locking. In total four + * special bits. + * + * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the HZP and + * EMPTY bits aren't set the entry is a normal DAX entry with a filesystem + * block allocation. */ -#define RADIX_DAX_SHIFT (RADIX_TREE_EXCEPTIONAL_SHIFT + 3) +#define RADIX_DAX_SHIFT (RADIX_TREE_EXCEPTIONAL_SHIFT + 4) #define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT) -#define RADIX_DAX_PTE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1)) -#define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2)) -#define RADIX_DAX_TYPE_MASK (RADIX_DAX_PTE | RADIX_DAX_PMD) -#define RADIX_DAX_TYPE(entry) ((unsigned long)entry & RADIX_DAX_TYPE_MASK) -#define RADIX_DAX_SECTOR(entry) (((unsigned long)entry >> RADIX_DAX_SHIFT)) -#define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \ - RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE) | \ - RADIX_TREE_EXCEPTIONAL_ENTRY)) +#define RADIX_DAX_PMD (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1)) +#define RADIX_DAX_HZP (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2)) +#define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3)) +static inline unsigned long dax_radix_sector(void *entry) +{ + return (unsigned long)entry >> RADIX_DAX_SHIFT; +} + +static inline void *dax_radix_locked_entry(sector_t sector, unsigned long flags) +{ + return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags | + ((unsigned long)sector << RADIX_DAX_SHIFT) | + RADIX_DAX_ENTRY_LOCK); +} ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, struct iomap_ops *ops); @@ -67,6 +79,27 @@ static inline int dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, return VM_FAULT_FALLBACK; } +#ifdef CONFIG_FS_DAX_PMD +static inline unsigned int dax_radix_order(void *entry) +{ + if ((unsigned long)entry & RADIX_DAX_PMD) + return PMD_SHIFT - PAGE_SHIFT; + return 0; +} +int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmd, unsigned int flags, struct iomap_ops *ops); +#else +static inline unsigned int dax_radix_order(void *entry) +{ + return 0; +} +static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmd, unsigned int flags, + struct iomap_ops *ops) +{ + return VM_FAULT_FALLBACK; +} +#endif int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *); #define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb) diff --git a/mm/filemap.c b/mm/filemap.c index 1ffb7dcd1b5d..00ab94a882de 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -137,8 +137,7 @@ static int page_cache_tree_insert(struct address_space *mapping, } else { /* DAX can replace empty locked entry with a hole */ WARN_ON_ONCE(p != - (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | - RADIX_DAX_ENTRY_LOCK)); + dax_radix_locked_entry(0, RADIX_DAX_EMPTY)); /* DAX accounts exceptional entries as normal pages */ if (node) workingset_node_pages_dec(node); -- cgit v1.2.3 From b57d74aff9ab92fbfb7c197c384d1adfa2827b2e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 1 Sep 2016 10:20:33 -0600 Subject: writeback: track if we're sleeping on progress in balance_dirty_pages() Note in the bdi_writeback structure whenever a task ends up sleeping waiting for progress. We can use that information in the lower layers to increase the priority of writes. Signed-off-by: Jens Axboe Reviewed-by: Jan Kara --- include/linux/backing-dev-defs.h | 2 ++ mm/backing-dev.c | 1 + mm/page-writeback.c | 1 + 3 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index c357f27d5483..dc5f76d7f648 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -116,6 +116,8 @@ struct bdi_writeback { struct list_head work_list; struct delayed_work dwork; /* work item used for writeback */ + unsigned long dirty_sleep; /* last wait */ + struct list_head bdi_node; /* anchored at bdi->wb_list */ #ifdef CONFIG_CGROUP_WRITEBACK diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 8fde443f36d7..3bfed5ab2475 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -310,6 +310,7 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi, spin_lock_init(&wb->work_lock); INIT_LIST_HEAD(&wb->work_list); INIT_DELAYED_WORK(&wb->dwork, wb_workfn); + wb->dirty_sleep = jiffies; wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp); if (!wb->congested) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 439cc63ad903..52e2f8e3b472 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1778,6 +1778,7 @@ pause: pause, start_time); __set_current_state(TASK_KILLABLE); + wb->dirty_sleep = now; io_schedule_timeout(pause); current->dirty_paused_when = now + pause; -- cgit v1.2.3 From 7392b4bb702b05749539ff0936e94976248240c9 Mon Sep 17 00:00:00 2001 From: "monk.liu" Date: Fri, 4 Nov 2016 16:16:09 -0400 Subject: dma-buf: return index of the first signaled fence (v2) Return the index of the first signaled fence. This information is useful in some APIs like Vulkan. v2: rebase on drm-next (fence -> dma_fence) Signed-off-by: monk.liu Signed-off-by: Alex Deucher Cc: Sumit Semwal Signed-off-by: Sumit Semwal [sumits: fix warnings] Link: http://patchwork.freedesktop.org/patch/msgid/1478290570-30982-1-git-send-email-alexander.deucher@amd.com --- drivers/dma-buf/dma-fence.c | 21 ++++++++++++++++----- include/linux/dma-fence.h | 3 ++- 2 files changed, 18 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 3a7bf009c21c..cda40cbd8c43 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -403,14 +403,18 @@ out: EXPORT_SYMBOL(dma_fence_default_wait); static bool -dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count) +dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count, + uint32_t *idx) { int i; for (i = 0; i < count; ++i) { struct dma_fence *fence = fences[i]; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { + if (idx) + *idx = i; return true; + } } return false; } @@ -422,6 +426,8 @@ dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count) * @count: [in] number of fences to wait on * @intr: [in] if true, do an interruptible wait * @timeout: [in] timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT + * @idx: [out] the first signaled fence index, meaningful only on + * positive return * * Returns -EINVAL on custom fence wait implementation, -ERESTARTSYS if * interrupted, 0 if the wait timed out, or the remaining timeout in jiffies @@ -433,7 +439,7 @@ dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count) */ signed long dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count, - bool intr, signed long timeout) + bool intr, signed long timeout, uint32_t *idx) { struct default_wait_cb *cb; signed long ret = timeout; @@ -444,8 +450,11 @@ dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count, if (timeout == 0) { for (i = 0; i < count; ++i) - if (dma_fence_is_signaled(fences[i])) + if (dma_fence_is_signaled(fences[i])) { + if (idx) + *idx = i; return 1; + } return 0; } @@ -468,6 +477,8 @@ dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count, if (dma_fence_add_callback(fence, &cb[i].base, dma_fence_default_wait_cb)) { /* This fence is already signaled */ + if (idx) + *idx = i; goto fence_rm_cb; } } @@ -478,7 +489,7 @@ dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count, else set_current_state(TASK_UNINTERRUPTIBLE); - if (dma_fence_test_signaled_any(fences, count)) + if (dma_fence_test_signaled_any(fences, count, idx)) break; ret = schedule_timeout(ret); diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index ba60c043a5d3..fcf4b1971eba 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -382,7 +382,8 @@ signed long dma_fence_wait_timeout(struct dma_fence *, bool intr, signed long timeout); signed long dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count, - bool intr, signed long timeout); + bool intr, signed long timeout, + uint32_t *idx); /** * dma_fence_wait - sleep until the fence gets signaled -- cgit v1.2.3 From 9e5a7e22951bc12ee45cb617919d57b5efce56b5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Nov 2016 08:12:47 -0600 Subject: blk-mq: export blk_mq_map_queues This will allow SCSI to have a single blk_mq_ops structure that either lets the LLDD map the queues to PCIe MSIx vectors or use the default. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Reviewed-by: Sagi Grimberg Reviewed-by: Jens Axboe Signed-off-by: Martin K. Petersen --- block/blk-mq-cpumap.c | 1 + block/blk-mq.h | 1 - include/linux/blk-mq.h | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 19b1d9c5f07e..8e61e8640e17 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -87,6 +87,7 @@ int blk_mq_map_queues(struct blk_mq_tag_set *set) free_cpumask_var(cpus); return 0; } +EXPORT_SYMBOL_GPL(blk_mq_map_queues); /* * We have no quick way of doing reverse lookups. This is only used at diff --git a/block/blk-mq.h b/block/blk-mq.h index e5d25249028c..5347f011e90d 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -38,7 +38,6 @@ void blk_mq_disable_hotplug(void); /* * CPU -> queue mappings */ -int blk_mq_map_queues(struct blk_mq_tag_set *set); extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int); static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 535ab2e13d2e..6c0fb259581f 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -237,6 +237,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q); void blk_mq_freeze_queue_start(struct request_queue *q); int blk_mq_reinit_tagset(struct blk_mq_tag_set *set); +int blk_mq_map_queues(struct blk_mq_tag_set *set); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); /* -- cgit v1.2.3 From feb3d79800ece19c18b979c5edd1c28755f59d07 Mon Sep 17 00:00:00 2001 From: Vivek Gautam Date: Tue, 8 Nov 2016 15:37:48 +0530 Subject: scsi: ufs-qcom: phy/hcd: Refactoring phy clock handling Add phy clock enable code to phy_power_on/off callbacks, and remove explicit calls to enable these phy clocks from the ufs-qcom hcd driver. Signed-off-by: Vivek Gautam Reviewed-by: Subhash Jadavani Signed-off-by: Martin K. Petersen --- drivers/phy/phy-qcom-ufs.c | 36 ++++++++++++++++++------------------ drivers/scsi/ufs/ufs-qcom.c | 21 ++++++--------------- include/linux/phy/phy-qcom-ufs.h | 18 ------------------ 3 files changed, 24 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/drivers/phy/phy-qcom-ufs.c b/drivers/phy/phy-qcom-ufs.c index c5c29fef4c56..fdd9b901983f 100644 --- a/drivers/phy/phy-qcom-ufs.c +++ b/drivers/phy/phy-qcom-ufs.c @@ -361,10 +361,9 @@ out: return ret; } -int ufs_qcom_phy_enable_ref_clk(struct phy *generic_phy) +static int ufs_qcom_phy_enable_ref_clk(struct ufs_qcom_phy *phy) { int ret = 0; - struct ufs_qcom_phy *phy = get_ufs_qcom_phy(generic_phy); if (phy->is_ref_clk_enabled) goto out; @@ -411,7 +410,6 @@ out_disable_src: out: return ret; } -EXPORT_SYMBOL_GPL(ufs_qcom_phy_enable_ref_clk); static int ufs_qcom_phy_disable_vreg(struct device *dev, struct ufs_qcom_phy_vreg *vreg) @@ -435,10 +433,8 @@ out: return ret; } -void ufs_qcom_phy_disable_ref_clk(struct phy *generic_phy) +static void ufs_qcom_phy_disable_ref_clk(struct ufs_qcom_phy *phy) { - struct ufs_qcom_phy *phy = get_ufs_qcom_phy(generic_phy); - if (phy->is_ref_clk_enabled) { clk_disable_unprepare(phy->ref_clk); /* @@ -451,7 +447,6 @@ void ufs_qcom_phy_disable_ref_clk(struct phy *generic_phy) phy->is_ref_clk_enabled = false; } } -EXPORT_SYMBOL_GPL(ufs_qcom_phy_disable_ref_clk); #define UFS_REF_CLK_EN (1 << 5) @@ -504,9 +499,8 @@ void ufs_qcom_phy_disable_dev_ref_clk(struct phy *generic_phy) EXPORT_SYMBOL_GPL(ufs_qcom_phy_disable_dev_ref_clk); /* Turn ON M-PHY RMMI interface clocks */ -int ufs_qcom_phy_enable_iface_clk(struct phy *generic_phy) +static int ufs_qcom_phy_enable_iface_clk(struct ufs_qcom_phy *phy) { - struct ufs_qcom_phy *phy = get_ufs_qcom_phy(generic_phy); int ret = 0; if (phy->is_iface_clk_enabled) @@ -530,20 +524,16 @@ int ufs_qcom_phy_enable_iface_clk(struct phy *generic_phy) out: return ret; } -EXPORT_SYMBOL_GPL(ufs_qcom_phy_enable_iface_clk); /* Turn OFF M-PHY RMMI interface clocks */ -void ufs_qcom_phy_disable_iface_clk(struct phy *generic_phy) +void ufs_qcom_phy_disable_iface_clk(struct ufs_qcom_phy *phy) { - struct ufs_qcom_phy *phy = get_ufs_qcom_phy(generic_phy); - if (phy->is_iface_clk_enabled) { clk_disable_unprepare(phy->tx_iface_clk); clk_disable_unprepare(phy->rx_iface_clk); phy->is_iface_clk_enabled = false; } } -EXPORT_SYMBOL_GPL(ufs_qcom_phy_disable_iface_clk); int ufs_qcom_phy_start_serdes(struct phy *generic_phy) { @@ -661,13 +651,20 @@ int ufs_qcom_phy_power_on(struct phy *generic_phy) goto out_disable_phy; } - err = ufs_qcom_phy_enable_ref_clk(generic_phy); + err = ufs_qcom_phy_enable_iface_clk(phy_common); if (err) { - dev_err(dev, "%s enable phy ref clock failed, err=%d\n", + dev_err(dev, "%s enable phy iface clock failed, err=%d\n", __func__, err); goto out_disable_pll; } + err = ufs_qcom_phy_enable_ref_clk(phy_common); + if (err) { + dev_err(dev, "%s enable phy ref clock failed, err=%d\n", + __func__, err); + goto out_disable_iface_clk; + } + /* enable device PHY ref_clk pad rail */ if (phy_common->vddp_ref_clk.reg) { err = ufs_qcom_phy_enable_vreg(dev, @@ -683,7 +680,9 @@ int ufs_qcom_phy_power_on(struct phy *generic_phy) goto out; out_disable_ref_clk: - ufs_qcom_phy_disable_ref_clk(generic_phy); + ufs_qcom_phy_disable_ref_clk(phy_common); +out_disable_iface_clk: + ufs_qcom_phy_disable_iface_clk(phy_common); out_disable_pll: ufs_qcom_phy_disable_vreg(dev, &phy_common->vdda_pll); out_disable_phy: @@ -702,7 +701,8 @@ int ufs_qcom_phy_power_off(struct phy *generic_phy) if (phy_common->vddp_ref_clk.reg) ufs_qcom_phy_disable_vreg(phy_common->dev, &phy_common->vddp_ref_clk); - ufs_qcom_phy_disable_ref_clk(generic_phy); + ufs_qcom_phy_disable_ref_clk(phy_common); + ufs_qcom_phy_disable_iface_clk(phy_common); ufs_qcom_phy_disable_vreg(phy_common->dev, &phy_common->vdda_pll); ufs_qcom_phy_disable_vreg(phy_common->dev, &phy_common->vdda_phy); diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c index 3c4f602eecd2..5f70a35c053f 100644 --- a/drivers/scsi/ufs/ufs-qcom.c +++ b/drivers/scsi/ufs/ufs-qcom.c @@ -1114,17 +1114,8 @@ static int ufs_qcom_setup_clocks(struct ufs_hba *hba, bool on, return 0; if (on && (status == POST_CHANGE)) { - err = ufs_qcom_phy_enable_iface_clk(host->generic_phy); - if (err) - goto out; + phy_power_on(host->generic_phy); - err = ufs_qcom_phy_enable_ref_clk(host->generic_phy); - if (err) { - dev_err(hba->dev, "%s enable phy ref clock failed, err=%d\n", - __func__, err); - ufs_qcom_phy_disable_iface_clk(host->generic_phy); - goto out; - } /* enable the device ref clock for HS mode*/ if (ufshcd_is_hs_mode(&hba->pwr_info)) ufs_qcom_dev_ref_clk_ctrl(host, true); @@ -1133,13 +1124,14 @@ static int ufs_qcom_setup_clocks(struct ufs_hba *hba, bool on, ufs_qcom_update_bus_bw_vote(host); } else if (!on && (status == PRE_CHANGE)) { - - /* M-PHY RMMI interface clocks can be turned off */ - ufs_qcom_phy_disable_iface_clk(host->generic_phy); - if (!ufs_qcom_is_link_active(hba)) + if (!ufs_qcom_is_link_active(hba)) { /* disable device ref_clk */ ufs_qcom_dev_ref_clk_ctrl(host, false); + /* powering off PHY during aggressive clk gating */ + phy_power_off(host->generic_phy); + } + vote = host->bus_vote.min_bw_vote; } @@ -1148,7 +1140,6 @@ static int ufs_qcom_setup_clocks(struct ufs_hba *hba, bool on, dev_err(hba->dev, "%s: set bus vote failed %d\n", __func__, err); -out: return err; } diff --git a/include/linux/phy/phy-qcom-ufs.h b/include/linux/phy/phy-qcom-ufs.h index 9d18e9f948e9..35c070ea6ea3 100644 --- a/include/linux/phy/phy-qcom-ufs.h +++ b/include/linux/phy/phy-qcom-ufs.h @@ -17,22 +17,6 @@ #include "phy.h" -/** - * ufs_qcom_phy_enable_ref_clk() - Enable the phy - * ref clock. - * @phy: reference to a generic phy - * - * returns 0 for success, and non-zero for error. - */ -int ufs_qcom_phy_enable_ref_clk(struct phy *phy); - -/** - * ufs_qcom_phy_disable_ref_clk() - Disable the phy - * ref clock. - * @phy: reference to a generic phy. - */ -void ufs_qcom_phy_disable_ref_clk(struct phy *phy); - /** * ufs_qcom_phy_enable_dev_ref_clk() - Enable the device * ref clock. @@ -47,8 +31,6 @@ void ufs_qcom_phy_enable_dev_ref_clk(struct phy *phy); */ void ufs_qcom_phy_disable_dev_ref_clk(struct phy *phy); -int ufs_qcom_phy_enable_iface_clk(struct phy *phy); -void ufs_qcom_phy_disable_iface_clk(struct phy *phy); int ufs_qcom_phy_start_serdes(struct phy *phy); int ufs_qcom_phy_set_tx_lane_enable(struct phy *phy, u32 tx_lanes); int ufs_qcom_phy_calibrate_phy(struct phy *phy, bool is_rate_B); -- cgit v1.2.3 From 3aeed5b573f97b4525841cc07c1e948227af389f Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 8 Nov 2016 16:34:57 -0800 Subject: Input: synaptics-rmi4 - move IRQ handling to rmi_driver The attn IRQ is related to the chip, rather than the transport, so move all handling of interrupts to the core driver. This also makes sure that there are no races between interrupts and availability of the resources used by the core driver. Signed-off-by: Bjorn Andersson Signed-off-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_driver.c | 73 +++++++++++++++++++++++++++++++++++++--- drivers/input/rmi4/rmi_i2c.c | 74 +++-------------------------------------- drivers/input/rmi4/rmi_spi.c | 72 +++------------------------------------ include/linux/rmi.h | 7 ++-- 4 files changed, 83 insertions(+), 143 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index 06feede3ce17..4f8d19794b01 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -136,7 +137,7 @@ static void process_one_interrupt(struct rmi_driver_data *data, } } -int rmi_process_interrupt_requests(struct rmi_device *rmi_dev) +static int rmi_process_interrupt_requests(struct rmi_device *rmi_dev) { struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev); struct device *dev = &rmi_dev->dev; @@ -181,7 +182,42 @@ int rmi_process_interrupt_requests(struct rmi_device *rmi_dev) return 0; } -EXPORT_SYMBOL_GPL(rmi_process_interrupt_requests); + +static irqreturn_t rmi_irq_fn(int irq, void *dev_id) +{ + struct rmi_device *rmi_dev = dev_id; + int ret; + + ret = rmi_process_interrupt_requests(rmi_dev); + if (ret) + rmi_dbg(RMI_DEBUG_CORE, &rmi_dev->dev, + "Failed to process interrupt request: %d\n", ret); + + return IRQ_HANDLED; +} + +static int rmi_irq_init(struct rmi_device *rmi_dev) +{ + struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev); + int irq_flags = irq_get_trigger_type(pdata->irq); + int ret; + + if (!irq_flags) + irq_flags = IRQF_TRIGGER_LOW; + + ret = devm_request_threaded_irq(&rmi_dev->dev, pdata->irq, NULL, + rmi_irq_fn, irq_flags | IRQF_ONESHOT, + dev_name(rmi_dev->xport->dev), + rmi_dev); + if (ret < 0) { + dev_err(&rmi_dev->dev, "Failed to register interrupt %d\n", + pdata->irq); + + return ret; + } + + return 0; +} static int suspend_one_function(struct rmi_function *fn) { @@ -802,8 +838,10 @@ err_put_fn: return error; } -int rmi_driver_suspend(struct rmi_device *rmi_dev) +int rmi_driver_suspend(struct rmi_device *rmi_dev, bool enable_wake) { + struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev); + int irq = pdata->irq; int retval = 0; retval = rmi_suspend_functions(rmi_dev); @@ -811,14 +849,33 @@ int rmi_driver_suspend(struct rmi_device *rmi_dev) dev_warn(&rmi_dev->dev, "Failed to suspend functions: %d\n", retval); + disable_irq(irq); + if (enable_wake && device_may_wakeup(rmi_dev->xport->dev)) { + retval = enable_irq_wake(irq); + if (!retval) + dev_warn(&rmi_dev->dev, + "Failed to enable irq for wake: %d\n", + retval); + } return retval; } EXPORT_SYMBOL_GPL(rmi_driver_suspend); -int rmi_driver_resume(struct rmi_device *rmi_dev) +int rmi_driver_resume(struct rmi_device *rmi_dev, bool clear_wake) { + struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev); + int irq = pdata->irq; int retval; + enable_irq(irq); + if (clear_wake && device_may_wakeup(rmi_dev->xport->dev)) { + retval = disable_irq_wake(irq); + if (!retval) + dev_warn(&rmi_dev->dev, + "Failed to disable irq for wake: %d\n", + retval); + } + retval = rmi_resume_functions(rmi_dev); if (retval) dev_warn(&rmi_dev->dev, "Failed to suspend functions: %d\n", @@ -831,6 +888,10 @@ EXPORT_SYMBOL_GPL(rmi_driver_resume); static int rmi_driver_remove(struct device *dev) { struct rmi_device *rmi_dev = to_rmi_device(dev); + struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev); + int irq = pdata->irq; + + disable_irq(irq); rmi_free_function_list(rmi_dev); @@ -1050,6 +1111,10 @@ static int rmi_driver_probe(struct device *dev) } } + retval = rmi_irq_init(rmi_dev); + if (retval < 0) + goto err_destroy_functions; + if (data->f01_container->dev.driver) /* Driver already bound, so enable ATTN now. */ return enable_sensor(rmi_dev); diff --git a/drivers/input/rmi4/rmi_i2c.c b/drivers/input/rmi4/rmi_i2c.c index 6f2e0e4f0296..64a548822da4 100644 --- a/drivers/input/rmi4/rmi_i2c.c +++ b/drivers/input/rmi4/rmi_i2c.c @@ -9,7 +9,6 @@ #include #include -#include #include #include #include @@ -35,8 +34,6 @@ struct rmi_i2c_xport { struct mutex page_mutex; int page; - int irq; - u8 *tx_buf; size_t tx_buf_size; @@ -177,42 +174,6 @@ static const struct rmi_transport_ops rmi_i2c_ops = { .read_block = rmi_i2c_read_block, }; -static irqreturn_t rmi_i2c_irq(int irq, void *dev_id) -{ - struct rmi_i2c_xport *rmi_i2c = dev_id; - struct rmi_device *rmi_dev = rmi_i2c->xport.rmi_dev; - int ret; - - ret = rmi_process_interrupt_requests(rmi_dev); - if (ret) - rmi_dbg(RMI_DEBUG_XPORT, &rmi_dev->dev, - "Failed to process interrupt request: %d\n", ret); - - return IRQ_HANDLED; -} - -static int rmi_i2c_init_irq(struct i2c_client *client) -{ - struct rmi_i2c_xport *rmi_i2c = i2c_get_clientdata(client); - int irq_flags = irqd_get_trigger_type(irq_get_irq_data(rmi_i2c->irq)); - int ret; - - if (!irq_flags) - irq_flags = IRQF_TRIGGER_LOW; - - ret = devm_request_threaded_irq(&client->dev, rmi_i2c->irq, NULL, - rmi_i2c_irq, irq_flags | IRQF_ONESHOT, client->name, - rmi_i2c); - if (ret < 0) { - dev_warn(&client->dev, "Failed to register interrupt %d\n", - rmi_i2c->irq); - - return ret; - } - - return 0; -} - #ifdef CONFIG_OF static const struct of_device_id rmi_i2c_of_match[] = { { .compatible = "syna,rmi4-i2c" }, @@ -240,8 +201,7 @@ static int rmi_i2c_probe(struct i2c_client *client, if (!client->dev.of_node && client_pdata) *pdata = *client_pdata; - if (client->irq > 0) - rmi_i2c->irq = client->irq; + pdata->irq = client->irq; rmi_dbg(RMI_DEBUG_XPORT, &client->dev, "Probing %s.\n", dev_name(&client->dev)); @@ -295,10 +255,6 @@ static int rmi_i2c_probe(struct i2c_client *client, return retval; } - retval = rmi_i2c_init_irq(client); - if (retval < 0) - return retval; - dev_info(&client->dev, "registered rmi i2c driver at %#04x.\n", client->addr); return 0; @@ -322,18 +278,10 @@ static int rmi_i2c_suspend(struct device *dev) struct rmi_i2c_xport *rmi_i2c = i2c_get_clientdata(client); int ret; - ret = rmi_driver_suspend(rmi_i2c->xport.rmi_dev); + ret = rmi_driver_suspend(rmi_i2c->xport.rmi_dev, true); if (ret) dev_warn(dev, "Failed to resume device: %d\n", ret); - disable_irq(rmi_i2c->irq); - if (device_may_wakeup(&client->dev)) { - ret = enable_irq_wake(rmi_i2c->irq); - if (!ret) - dev_warn(dev, "Failed to enable irq for wake: %d\n", - ret); - } - regulator_bulk_disable(ARRAY_SIZE(rmi_i2c->supplies), rmi_i2c->supplies); @@ -353,15 +301,7 @@ static int rmi_i2c_resume(struct device *dev) msleep(rmi_i2c->startup_delay); - enable_irq(rmi_i2c->irq); - if (device_may_wakeup(&client->dev)) { - ret = disable_irq_wake(rmi_i2c->irq); - if (!ret) - dev_warn(dev, "Failed to disable irq for wake: %d\n", - ret); - } - - ret = rmi_driver_resume(rmi_i2c->xport.rmi_dev); + ret = rmi_driver_resume(rmi_i2c->xport.rmi_dev, true); if (ret) dev_warn(dev, "Failed to resume device: %d\n", ret); @@ -376,12 +316,10 @@ static int rmi_i2c_runtime_suspend(struct device *dev) struct rmi_i2c_xport *rmi_i2c = i2c_get_clientdata(client); int ret; - ret = rmi_driver_suspend(rmi_i2c->xport.rmi_dev); + ret = rmi_driver_suspend(rmi_i2c->xport.rmi_dev, false); if (ret) dev_warn(dev, "Failed to resume device: %d\n", ret); - disable_irq(rmi_i2c->irq); - regulator_bulk_disable(ARRAY_SIZE(rmi_i2c->supplies), rmi_i2c->supplies); @@ -401,9 +339,7 @@ static int rmi_i2c_runtime_resume(struct device *dev) msleep(rmi_i2c->startup_delay); - enable_irq(rmi_i2c->irq); - - ret = rmi_driver_resume(rmi_i2c->xport.rmi_dev); + ret = rmi_driver_resume(rmi_i2c->xport.rmi_dev, false); if (ret) dev_warn(dev, "Failed to resume device: %d\n", ret); diff --git a/drivers/input/rmi4/rmi_spi.c b/drivers/input/rmi4/rmi_spi.c index 55bd1b34970c..f3e9e488635c 100644 --- a/drivers/input/rmi4/rmi_spi.c +++ b/drivers/input/rmi4/rmi_spi.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include "rmi_driver.h" @@ -44,8 +43,6 @@ struct rmi_spi_xport { struct mutex page_mutex; int page; - int irq; - u8 *rx_buf; u8 *tx_buf; int xfer_buf_size; @@ -326,41 +323,6 @@ static const struct rmi_transport_ops rmi_spi_ops = { .read_block = rmi_spi_read_block, }; -static irqreturn_t rmi_spi_irq(int irq, void *dev_id) -{ - struct rmi_spi_xport *rmi_spi = dev_id; - struct rmi_device *rmi_dev = rmi_spi->xport.rmi_dev; - int ret; - - ret = rmi_process_interrupt_requests(rmi_dev); - if (ret) - rmi_dbg(RMI_DEBUG_XPORT, &rmi_dev->dev, - "Failed to process interrupt request: %d\n", ret); - - return IRQ_HANDLED; -} - -static int rmi_spi_init_irq(struct spi_device *spi) -{ - struct rmi_spi_xport *rmi_spi = spi_get_drvdata(spi); - int irq_flags = irqd_get_trigger_type(irq_get_irq_data(rmi_spi->irq)); - int ret; - - if (!irq_flags) - irq_flags = IRQF_TRIGGER_LOW; - - ret = devm_request_threaded_irq(&spi->dev, rmi_spi->irq, NULL, - rmi_spi_irq, irq_flags | IRQF_ONESHOT, - dev_name(&spi->dev), rmi_spi); - if (ret < 0) { - dev_warn(&spi->dev, "Failed to register interrupt %d\n", - rmi_spi->irq); - return ret; - } - - return 0; -} - #ifdef CONFIG_OF static int rmi_spi_of_probe(struct spi_device *spi, struct rmi_device_platform_data *pdata) @@ -433,8 +395,7 @@ static int rmi_spi_probe(struct spi_device *spi) return retval; } - if (spi->irq > 0) - rmi_spi->irq = spi->irq; + pdata->irq = spi->irq; rmi_spi->spi = spi; mutex_init(&rmi_spi->page_mutex); @@ -465,10 +426,6 @@ static int rmi_spi_probe(struct spi_device *spi) return retval; } - retval = rmi_spi_init_irq(spi); - if (retval < 0) - return retval; - dev_info(&spi->dev, "registered RMI SPI driver\n"); return 0; } @@ -489,17 +446,10 @@ static int rmi_spi_suspend(struct device *dev) struct rmi_spi_xport *rmi_spi = spi_get_drvdata(spi); int ret; - ret = rmi_driver_suspend(rmi_spi->xport.rmi_dev); + ret = rmi_driver_suspend(rmi_spi->xport.rmi_dev, true); if (ret) dev_warn(dev, "Failed to resume device: %d\n", ret); - disable_irq(rmi_spi->irq); - if (device_may_wakeup(&spi->dev)) { - ret = enable_irq_wake(rmi_spi->irq); - if (!ret) - dev_warn(dev, "Failed to enable irq for wake: %d\n", - ret); - } return ret; } @@ -509,15 +459,7 @@ static int rmi_spi_resume(struct device *dev) struct rmi_spi_xport *rmi_spi = spi_get_drvdata(spi); int ret; - enable_irq(rmi_spi->irq); - if (device_may_wakeup(&spi->dev)) { - ret = disable_irq_wake(rmi_spi->irq); - if (!ret) - dev_warn(dev, "Failed to disable irq for wake: %d\n", - ret); - } - - ret = rmi_driver_resume(rmi_spi->xport.rmi_dev); + ret = rmi_driver_resume(rmi_spi->xport.rmi_dev, true); if (ret) dev_warn(dev, "Failed to resume device: %d\n", ret); @@ -532,12 +474,10 @@ static int rmi_spi_runtime_suspend(struct device *dev) struct rmi_spi_xport *rmi_spi = spi_get_drvdata(spi); int ret; - ret = rmi_driver_suspend(rmi_spi->xport.rmi_dev); + ret = rmi_driver_suspend(rmi_spi->xport.rmi_dev, false); if (ret) dev_warn(dev, "Failed to resume device: %d\n", ret); - disable_irq(rmi_spi->irq); - return 0; } @@ -547,9 +487,7 @@ static int rmi_spi_runtime_resume(struct device *dev) struct rmi_spi_xport *rmi_spi = spi_get_drvdata(spi); int ret; - enable_irq(rmi_spi->irq); - - ret = rmi_driver_resume(rmi_spi->xport.rmi_dev); + ret = rmi_driver_resume(rmi_spi->xport.rmi_dev, false); if (ret) dev_warn(dev, "Failed to resume device: %d\n", ret); diff --git a/include/linux/rmi.h b/include/linux/rmi.h index e0aca1476001..5944e6c2470d 100644 --- a/include/linux/rmi.h +++ b/include/linux/rmi.h @@ -204,9 +204,11 @@ struct rmi_device_platform_data_spi { * @reset_delay_ms - after issuing a reset command to the touch sensor, the * driver waits a few milliseconds to give the firmware a chance to * to re-initialize. You can override the default wait period here. + * @irq: irq associated with the attn gpio line, or negative */ struct rmi_device_platform_data { int reset_delay_ms; + int irq; struct rmi_device_platform_data_spi spi_data; @@ -352,8 +354,7 @@ struct rmi_driver_data { int rmi_register_transport_device(struct rmi_transport_dev *xport); void rmi_unregister_transport_device(struct rmi_transport_dev *xport); -int rmi_process_interrupt_requests(struct rmi_device *rmi_dev); -int rmi_driver_suspend(struct rmi_device *rmi_dev); -int rmi_driver_resume(struct rmi_device *rmi_dev); +int rmi_driver_suspend(struct rmi_device *rmi_dev, bool enable_wake); +int rmi_driver_resume(struct rmi_device *rmi_dev, bool clear_wake); #endif -- cgit v1.2.3 From 2775e523246e11c5ce90b69226c5e67aa43e64a5 Mon Sep 17 00:00:00 2001 From: Andrew Duggan Date: Tue, 8 Nov 2016 16:48:48 -0800 Subject: Input: synaptics-rmi4 - add parameters for dribble packets and palm detect gesture The rmi_f11 driver currently disables dribble packets and the palm detect gesture for all devices. This patch creates a parameter in the 2d sensor platform data for controlling this functionality on a per device basis. For more information on dribble packets: Commit 05ba999fcabb ("HID: rmi: disable dribble packets on Synaptics touchpads") For more information on the palm detect gesture: Commit f097deef59a6 ("HID: rmi: disable palm detect gesture when present") Signed-off-by: Andrew Duggan Reviewed-by: Benjamin Tissoires Signed-off-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_2d_sensor.h | 2 ++ drivers/input/rmi4/rmi_f01.c | 6 +++--- drivers/input/rmi4/rmi_f11.c | 32 ++++++++++++++++++++++++++++---- include/linux/rmi.h | 21 +++++++++++++-------- 4 files changed, 46 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/rmi4/rmi_2d_sensor.h b/drivers/input/rmi4/rmi_2d_sensor.h index 77fcdfef003c..c871bef4dac0 100644 --- a/drivers/input/rmi4/rmi_2d_sensor.h +++ b/drivers/input/rmi4/rmi_2d_sensor.h @@ -67,6 +67,8 @@ struct rmi_2d_sensor { u8 report_rel; u8 x_mm; u8 y_mm; + enum rmi_reg_state dribble; + enum rmi_reg_state palm_detect; }; int rmi_2d_sensor_of_probe(struct device *dev, diff --git a/drivers/input/rmi4/rmi_f01.c b/drivers/input/rmi4/rmi_f01.c index fac81fc9bcf6..2cfa9f64acfb 100644 --- a/drivers/input/rmi4/rmi_f01.c +++ b/drivers/input/rmi4/rmi_f01.c @@ -327,12 +327,12 @@ static int rmi_f01_probe(struct rmi_function *fn) } switch (pdata->power_management.nosleep) { - case RMI_F01_NOSLEEP_DEFAULT: + case RMI_REG_STATE_DEFAULT: break; - case RMI_F01_NOSLEEP_OFF: + case RMI_REG_STATE_OFF: f01->device_control.ctrl0 &= ~RMI_F01_CTRL0_NOSLEEP_BIT; break; - case RMI_F01_NOSLEEP_ON: + case RMI_REG_STATE_ON: f01->device_control.ctrl0 |= RMI_F01_CTRL0_NOSLEEP_BIT; break; } diff --git a/drivers/input/rmi4/rmi_f11.c b/drivers/input/rmi4/rmi_f11.c index 3218742d2d8c..c252d405df4c 100644 --- a/drivers/input/rmi4/rmi_f11.c +++ b/drivers/input/rmi4/rmi_f11.c @@ -1142,6 +1142,8 @@ static int rmi_f11_initialize(struct rmi_function *fn) sensor->topbuttonpad = f11->sensor_pdata.topbuttonpad; sensor->kernel_tracking = f11->sensor_pdata.kernel_tracking; sensor->dmax = f11->sensor_pdata.dmax; + sensor->dribble = f11->sensor_pdata.dribble; + sensor->palm_detect = f11->sensor_pdata.palm_detect; if (f11->sens_query.has_physical_props) { sensor->x_mm = f11->sens_query.x_sensor_size_mm; @@ -1209,11 +1211,33 @@ static int rmi_f11_initialize(struct rmi_function *fn) ctrl->ctrl0_11[RMI_F11_DELTA_Y_THRESHOLD] = sensor->axis_align.delta_y_threshold; - if (f11->sens_query.has_dribble) - ctrl->ctrl0_11[0] = ctrl->ctrl0_11[0] & ~BIT(6); + if (f11->sens_query.has_dribble) { + switch (sensor->dribble) { + case RMI_REG_STATE_OFF: + ctrl->ctrl0_11[0] &= ~BIT(6); + break; + case RMI_REG_STATE_ON: + ctrl->ctrl0_11[0] |= BIT(6); + break; + case RMI_REG_STATE_DEFAULT: + default: + break; + } + } - if (f11->sens_query.has_palm_det) - ctrl->ctrl0_11[11] = ctrl->ctrl0_11[11] & ~BIT(0); + if (f11->sens_query.has_palm_det) { + switch (sensor->palm_detect) { + case RMI_REG_STATE_OFF: + ctrl->ctrl0_11[11] &= ~BIT(0); + break; + case RMI_REG_STATE_ON: + ctrl->ctrl0_11[11] |= BIT(0); + break; + case RMI_REG_STATE_DEFAULT: + default: + break; + } + } rc = f11_write_control_regs(fn, &f11->sens_query, &f11->dev_controls, fn->fd.query_base_addr); diff --git a/include/linux/rmi.h b/include/linux/rmi.h index 5944e6c2470d..ac904bb439a5 100644 --- a/include/linux/rmi.h +++ b/include/linux/rmi.h @@ -99,6 +99,8 @@ struct rmi_2d_sensor_platform_data { bool topbuttonpad; bool kernel_tracking; int dmax; + int dribble; + int palm_detect; }; /** @@ -116,14 +118,17 @@ struct rmi_f30_data { bool disable; }; -/** - * struct rmi_f01_power - override default power management settings. - * + +/* + * Set the state of a register + * DEFAULT - use the default value set by the firmware config + * OFF - explicitly disable the register + * ON - explicitly enable the register */ -enum rmi_f01_nosleep { - RMI_F01_NOSLEEP_DEFAULT = 0, - RMI_F01_NOSLEEP_OFF = 1, - RMI_F01_NOSLEEP_ON = 2 +enum rmi_reg_state { + RMI_REG_STATE_DEFAULT = 0, + RMI_REG_STATE_OFF = 1, + RMI_REG_STATE_ON = 2 }; /** @@ -143,7 +148,7 @@ enum rmi_f01_nosleep { * when the touch sensor is in doze mode, in units of 10ms. */ struct rmi_f01_power_management { - enum rmi_f01_nosleep nosleep; + enum rmi_reg_state nosleep; u8 wakeup_threshold; u8 doze_holdoff; u8 doze_interval; -- cgit v1.2.3 From 20e407e195b29a4f5a18d713a61f54a75f992bd5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Nov 2016 17:15:01 -0800 Subject: genirq/affinity: Introduce struct irq_affinity Some drivers (various network and RDMA adapter for example) have a MSI-X vector layout where most of the vectors are used for I/O queues and should have CPU affinity assigned to them, but some (usually 1 but sometimes more) at the beginning or end are used for low-performance admin or configuration work and should not have any explicit affinity assigned to them. Add a new irq_affinity structure, which will be passed through a variant of pci_irq_alloc_vectors that allows to specify these requirements (and is extensible to any future quirks in that area) so that the core IRQ affinity algorithm can take this quirks into account. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Acked-by: Jens Axboe Cc: linux-block@vger.kernel.org Cc: linux-pci@vger.kernel.org Link: http://lkml.kernel.org/r/1478654107-7384-2-git-send-email-hch@lst.de Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 72f0721f75e7..6b5268688a81 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -232,6 +232,18 @@ struct irq_affinity_notify { void (*release)(struct kref *ref); }; +/** + * struct irq_affinity - Description for automatic irq affinity assignements + * @pre_vectors: Don't apply affinity to @pre_vectors at beginning of + * the MSI(-X) vector space + * @post_vectors: Don't apply affinity to @post_vectors at end of + * the MSI(-X) vector space + */ +struct irq_affinity { + int pre_vectors; + int post_vectors; +}; + #if defined(CONFIG_SMP) extern cpumask_var_t irq_default_affinity; -- cgit v1.2.3 From 212bd846223c718b6577d4df16fd8d05a55ad914 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Nov 2016 17:15:02 -0800 Subject: genirq/affinity: Handle pre/post vectors in irq_calc_affinity_vectors() Only calculate the affinity for the main I/O vectors, and skip the pre or post vectors specified by struct irq_affinity. Also remove the irq_affinity cpumask argument that has never been used. If we ever need it in the future we can pass it through struct irq_affinity. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Acked-by: Jens Axboe Cc: linux-block@vger.kernel.org Cc: linux-pci@vger.kernel.org Link: http://lkml.kernel.org/r/1478654107-7384-3-git-send-email-hch@lst.de Signed-off-by: Thomas Gleixner --- drivers/pci/msi.c | 8 ++++---- include/linux/interrupt.h | 4 ++-- kernel/irq/affinity.c | 24 ++++++++++-------------- 3 files changed, 16 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index ad70507cfb56..dad2da7cf80e 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1061,6 +1061,7 @@ EXPORT_SYMBOL(pci_msi_enabled); static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, unsigned int flags) { + static const struct irq_affinity default_affd; bool affinity = flags & PCI_IRQ_AFFINITY; int nvec; int rc; @@ -1091,8 +1092,7 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, for (;;) { if (affinity) { - nvec = irq_calc_affinity_vectors(dev->irq_affinity, - nvec); + nvec = irq_calc_affinity_vectors(nvec, &default_affd); if (nvec < minvec) return -ENOSPC; } @@ -1132,6 +1132,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, int maxvec, unsigned int flags) { + static const struct irq_affinity default_affd; bool affinity = flags & PCI_IRQ_AFFINITY; int rc, nvec = maxvec; @@ -1140,8 +1141,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev, for (;;) { if (affinity) { - nvec = irq_calc_affinity_vectors(dev->irq_affinity, - nvec); + nvec = irq_calc_affinity_vectors(nvec, &default_affd); if (nvec < minvec) return -ENOSPC; } diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 6b5268688a81..9081f23bc0ff 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -291,7 +291,7 @@ extern int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity, int nvec); -int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec); +int irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd); #else /* CONFIG_SMP */ @@ -331,7 +331,7 @@ irq_create_affinity_masks(const struct cpumask *affinity, int nvec) } static inline int -irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec) +irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd) { return maxvec; } diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index 17f51d63da56..8d9259727cb4 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -131,24 +131,20 @@ out: } /** - * irq_calc_affinity_vectors - Calculate to optimal number of vectors for a given affinity mask - * @affinity: The affinity mask to spread. If NULL cpu_online_mask - * is used - * @maxvec: The maximum number of vectors available + * irq_calc_affinity_vectors - Calculate the optimal number of vectors + * @maxvec: The maximum number of vectors available + * @affd: Description of the affinity requirements */ -int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec) +int irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd) { - int cpus, ret; + int resv = affd->pre_vectors + affd->post_vectors; + int vecs = maxvec - resv; + int cpus; /* Stabilize the cpumasks */ get_online_cpus(); - /* If the supplied affinity mask is NULL, use cpu online mask */ - if (!affinity) - affinity = cpu_online_mask; - - cpus = cpumask_weight(affinity); - ret = (cpus < maxvec) ? cpus : maxvec; - + cpus = cpumask_weight(cpu_online_mask); put_online_cpus(); - return ret; + + return min(cpus, vecs) + resv; } -- cgit v1.2.3 From 67c93c218dc5d1b45d547771f1fdb44a381e1faf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Nov 2016 17:15:03 -0800 Subject: genirq/affinity: Handle pre/post vectors in irq_create_affinity_masks() Only calculate the affinity for the main I/O vectors, and skip the pre or post vectors specified by struct irq_affinity. Also remove the irq_affinity cpumask argument that has never been used. If we ever need it in the future we can pass it through struct irq_affinity. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Acked-by: Bjorn Helgaas Acked-by: Jens Axboe Cc: linux-block@vger.kernel.org Cc: linux-pci@vger.kernel.org Link: http://lkml.kernel.org/r/1478654107-7384-4-git-send-email-hch@lst.de Signed-off-by: Thomas Gleixner --- drivers/pci/msi.c | 6 ++++-- include/linux/interrupt.h | 4 ++-- kernel/irq/affinity.c | 46 +++++++++++++++++++++++++--------------------- 3 files changed, 31 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index dad2da7cf80e..f4a108b59336 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -553,12 +553,13 @@ error_attrs: static struct msi_desc * msi_setup_entry(struct pci_dev *dev, int nvec, bool affinity) { + static const struct irq_affinity default_affd; struct cpumask *masks = NULL; struct msi_desc *entry; u16 control; if (affinity) { - masks = irq_create_affinity_masks(dev->irq_affinity, nvec); + masks = irq_create_affinity_masks(nvec, &default_affd); if (!masks) pr_err("Unable to allocate affinity masks, ignoring\n"); } @@ -692,12 +693,13 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, struct msix_entry *entries, int nvec, bool affinity) { + static const struct irq_affinity default_affd; struct cpumask *curmsk, *masks = NULL; struct msi_desc *entry; int ret, i; if (affinity) { - masks = irq_create_affinity_masks(dev->irq_affinity, nvec); + masks = irq_create_affinity_masks(nvec, &default_affd); if (!masks) pr_err("Unable to allocate affinity masks, ignoring\n"); } diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9081f23bc0ff..53144e78a369 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -290,7 +290,7 @@ extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m); extern int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); -struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity, int nvec); +struct cpumask *irq_create_affinity_masks(int nvec, const struct irq_affinity *affd); int irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd); #else /* CONFIG_SMP */ @@ -325,7 +325,7 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) } static inline struct cpumask * -irq_create_affinity_masks(const struct cpumask *affinity, int nvec) +irq_create_affinity_masks(int nvec, const struct irq_affinity *affd) { return NULL; } diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index 8d9259727cb4..17360bd9619b 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -51,16 +51,16 @@ static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk) /** * irq_create_affinity_masks - Create affinity masks for multiqueue spreading - * @affinity: The affinity mask to spread. If NULL cpu_online_mask - * is used - * @nvecs: The number of vectors + * @nvecs: The total number of vectors + * @affd: Description of the affinity requirements * * Returns the masks pointer or NULL if allocation failed. */ -struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity, - int nvec) +struct cpumask * +irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) { - int n, nodes, vecs_per_node, cpus_per_vec, extra_vecs, curvec = 0; + int n, nodes, vecs_per_node, cpus_per_vec, extra_vecs, curvec; + int affv = nvecs - affd->pre_vectors - affd->post_vectors; nodemask_t nodemsk = NODE_MASK_NONE; struct cpumask *masks; cpumask_var_t nmsk; @@ -68,46 +68,46 @@ struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity, if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) return NULL; - masks = kzalloc(nvec * sizeof(*masks), GFP_KERNEL); + masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL); if (!masks) goto out; + /* Fill out vectors at the beginning that don't need affinity */ + for (curvec = 0; curvec < affd->pre_vectors; curvec++) + cpumask_copy(masks + curvec, cpu_possible_mask); + /* Stabilize the cpumasks */ get_online_cpus(); - /* If the supplied affinity mask is NULL, use cpu online mask */ - if (!affinity) - affinity = cpu_online_mask; - - nodes = get_nodes_in_cpumask(affinity, &nodemsk); + nodes = get_nodes_in_cpumask(cpu_online_mask, &nodemsk); /* * If the number of nodes in the mask is less than or equal the * number of vectors we just spread the vectors across the nodes. */ - if (nvec <= nodes) { + if (affv <= nodes) { for_each_node_mask(n, nodemsk) { cpumask_copy(masks + curvec, cpumask_of_node(n)); - if (++curvec == nvec) + if (++curvec == affv) break; } - goto outonl; + goto done; } /* Spread the vectors per node */ - vecs_per_node = nvec / nodes; + vecs_per_node = affv / nodes; /* Account for rounding errors */ - extra_vecs = nvec - (nodes * vecs_per_node); + extra_vecs = affv - (nodes * vecs_per_node); for_each_node_mask(n, nodemsk) { int ncpus, v, vecs_to_assign = vecs_per_node; /* Get the cpus on this node which are in the mask */ - cpumask_and(nmsk, affinity, cpumask_of_node(n)); + cpumask_and(nmsk, cpu_online_mask, cpumask_of_node(n)); /* Calculate the number of cpus per vector */ ncpus = cpumask_weight(nmsk); - for (v = 0; curvec < nvec && v < vecs_to_assign; curvec++, v++) { + for (v = 0; curvec < affv && v < vecs_to_assign; curvec++, v++) { cpus_per_vec = ncpus / vecs_to_assign; /* Account for extra vectors to compensate rounding errors */ @@ -119,12 +119,16 @@ struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity, irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec); } - if (curvec >= nvec) + if (curvec >= affv) break; } -outonl: +done: put_online_cpus(); + + /* Fill out vectors at the end that don't need affinity */ + for (; curvec < nvecs; curvec++) + cpumask_copy(masks + curvec, cpu_possible_mask); out: free_cpumask_var(nmsk); return masks; -- cgit v1.2.3 From 402723ad5c625ee052432698ae5e56b02d38d4ec Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Nov 2016 17:15:05 -0800 Subject: PCI/MSI: Provide pci_alloc_irq_vectors_affinity() This is a variant of pci_alloc_irq_vectors() that allows passing a struct irq_affinity to provide fine-grained IRQ affinity control. For now this means being able to exclude vectors at the beginning or end of the MSI vector space, but it could also be used for any other quirks needed in the future (e.g. more vectors than CPUs, or excluding CPUs from the spreading). Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Acked-by: Bjorn Helgaas Acked-by: Jens Axboe Cc: linux-block@vger.kernel.org Cc: linux-pci@vger.kernel.org Link: http://lkml.kernel.org/r/1478654107-7384-6-git-send-email-hch@lst.de Signed-off-by: Thomas Gleixner --- drivers/pci/msi.c | 20 +++++++++++++------- include/linux/pci.h | 24 +++++++++++++++++++----- 2 files changed, 32 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 512f388a74f2..dd27f73a45fc 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1179,11 +1179,12 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, EXPORT_SYMBOL(pci_enable_msix_range); /** - * pci_alloc_irq_vectors - allocate multiple IRQs for a device + * pci_alloc_irq_vectors_affinity - allocate multiple IRQs for a device * @dev: PCI device to operate on * @min_vecs: minimum number of vectors required (must be >= 1) * @max_vecs: maximum (desired) number of vectors * @flags: flags or quirks for the allocation + * @affd: optional description of the affinity requirements * * Allocate up to @max_vecs interrupt vectors for @dev, using MSI-X or MSI * vectors if available, and fall back to a single legacy vector @@ -1195,15 +1196,20 @@ EXPORT_SYMBOL(pci_enable_msix_range); * To get the Linux IRQ number used for a vector that can be passed to * request_irq() use the pci_irq_vector() helper. */ -int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, - unsigned int max_vecs, unsigned int flags) +int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, + unsigned int max_vecs, unsigned int flags, + const struct irq_affinity *affd) { static const struct irq_affinity msi_default_affd; - const struct irq_affinity *affd = NULL; int vecs = -ENOSPC; - if (flags & PCI_IRQ_AFFINITY) - affd = &msi_default_affd; + if (flags & PCI_IRQ_AFFINITY) { + if (!affd) + affd = &msi_default_affd; + } else { + if (WARN_ON(affd)) + affd = NULL; + } if (flags & PCI_IRQ_MSIX) { vecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs, @@ -1226,7 +1232,7 @@ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, return vecs; } -EXPORT_SYMBOL(pci_alloc_irq_vectors); +EXPORT_SYMBOL(pci_alloc_irq_vectors_affinity); /** * pci_free_irq_vectors - free previously allocated IRQs for a device diff --git a/include/linux/pci.h b/include/linux/pci.h index 0e49f70dbd9b..7090f5ff7252 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -244,6 +244,7 @@ struct pci_cap_saved_state { struct pci_cap_saved_data cap; }; +struct irq_affinity; struct pcie_link_state; struct pci_vpd; struct pci_sriov; @@ -1310,8 +1311,10 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev, return rc; return 0; } -int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, - unsigned int max_vecs, unsigned int flags); +int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, + unsigned int max_vecs, unsigned int flags, + const struct irq_affinity *affd); + void pci_free_irq_vectors(struct pci_dev *dev); int pci_irq_vector(struct pci_dev *dev, unsigned int nr); const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec); @@ -1339,14 +1342,17 @@ static inline int pci_enable_msix_range(struct pci_dev *dev, static inline int pci_enable_msix_exact(struct pci_dev *dev, struct msix_entry *entries, int nvec) { return -ENOSYS; } -static inline int pci_alloc_irq_vectors(struct pci_dev *dev, - unsigned int min_vecs, unsigned int max_vecs, - unsigned int flags) + +static inline int +pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, + unsigned int max_vecs, unsigned int flags, + const struct irq_affinity *aff_desc) { if (min_vecs > 1) return -EINVAL; return 1; } + static inline void pci_free_irq_vectors(struct pci_dev *dev) { } @@ -1364,6 +1370,14 @@ static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, } #endif +static inline int +pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, + unsigned int max_vecs, unsigned int flags) +{ + return pci_alloc_irq_vectors_affinity(dev, min_vecs, max_vecs, flags, + NULL); +} + #ifdef CONFIG_PCIEPORTBUS extern bool pcie_ports_disabled; extern bool pcie_ports_auto; -- cgit v1.2.3 From 0cf71b04467bc34063cecae577f12481da6cc565 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Nov 2016 17:15:06 -0800 Subject: PCI: Remove the irq_affinity mask from struct pci_dev This has never been used, and now is totally unreferenced. Nuke it. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Acked-by: Bjorn Helgaas Acked-by: Jens Axboe Cc: linux-block@vger.kernel.org Cc: linux-pci@vger.kernel.org Link: http://lkml.kernel.org/r/1478654107-7384-7-git-send-email-hch@lst.de Signed-off-by: Thomas Gleixner --- include/linux/pci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 7090f5ff7252..f2ba6ac21c75 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -333,7 +333,6 @@ struct pci_dev { * directly, use the values stored here. They might be different! */ unsigned int irq; - struct cpumask *irq_affinity; struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ bool match_driver; /* Skip attaching driver */ -- cgit v1.2.3 From 67db3e4bfbc90657c7be840aad5585be46240d6f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 4 Nov 2016 11:54:32 -0700 Subject: tcp: no longer hold ehash lock while calling tcp_get_info() We had various problems in the past in tcp_get_info() and used specific synchronization to avoid deadlocks. We would like to add more instrumentation points for TCP, and avoiding grabing socket lock in tcp_getinfo() was too costly. Being able to lock the socket allows to provide consistent set of fields. inet_diag_dump_icsk() can make sure ehash locks are not held any more when tcp_get_info() is called. We can remove syncp added in commit d654976cbf85 ("tcp: fix a potential deadlock in tcp_get_info()"), but we need to use lock_sock_fast() instead of spin_lock_bh() since TCP input path can now be run from process context. Signed-off-by: Eric Dumazet Signed-off-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 -- net/ipv4/inet_diag.c | 48 +++++++++++++++++++++++++++++++++--------------- net/ipv4/tcp.c | 20 +++++++++----------- net/ipv4/tcp_input.c | 4 ---- 4 files changed, 42 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a17ae7b85218..32a7c7e35b71 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -176,8 +176,6 @@ struct tcp_sock { * sum(delta(snd_una)), or how many bytes * were acked. */ - struct u64_stats_sync syncp; /* protects 64bit vars (cf tcp_get_info()) */ - u32 snd_una; /* First byte we want an ack for */ u32 snd_sml; /* Last byte of the most recently transmitted small packet */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 3b34024202d8..4dea33e5f295 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -861,10 +861,11 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, struct nlattr *bc) { + bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct net *net = sock_net(skb->sk); - int i, num, s_i, s_num; u32 idiag_states = r->idiag_states; - bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); + int i, num, s_i, s_num; + struct sock *sk; if (idiag_states & TCPF_SYN_RECV) idiag_states |= TCPF_NEW_SYN_RECV; @@ -877,7 +878,6 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct inet_listen_hashbucket *ilb; - struct sock *sk; num = 0; ilb = &hashinfo->listening_hash[i]; @@ -922,13 +922,14 @@ skip_listen_ht: if (!(idiag_states & ~TCPF_LISTEN)) goto out; +#define SKARR_SZ 16 for (i = s_i; i <= hashinfo->ehash_mask; i++) { struct inet_ehash_bucket *head = &hashinfo->ehash[i]; spinlock_t *lock = inet_ehash_lockp(hashinfo, i); struct hlist_nulls_node *node; - struct sock *sk; - - num = 0; + struct sock *sk_arr[SKARR_SZ]; + int num_arr[SKARR_SZ]; + int idx, accum, res; if (hlist_nulls_empty(&head->chain)) continue; @@ -936,9 +937,12 @@ skip_listen_ht: if (i > s_i) s_num = 0; +next_chunk: + num = 0; + accum = 0; spin_lock_bh(lock); sk_nulls_for_each(sk, node, &head->chain) { - int state, res; + int state; if (!net_eq(sock_net(sk), net)) continue; @@ -962,21 +966,35 @@ skip_listen_ht: if (!inet_diag_bc_sk(bc, sk)) goto next_normal; - res = sk_diag_fill(sk, skb, r, + sock_hold(sk); + num_arr[accum] = num; + sk_arr[accum] = sk; + if (++accum == SKARR_SZ) + break; +next_normal: + ++num; + } + spin_unlock_bh(lock); + res = 0; + for (idx = 0; idx < accum; idx++) { + if (res >= 0) { + res = sk_diag_fill(sk_arr[idx], skb, r, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, net_admin); - if (res < 0) { - spin_unlock_bh(lock); - goto done; + if (res < 0) + num = num_arr[idx]; } -next_normal: - ++num; + sock_gen_put(sk_arr[idx]); } - - spin_unlock_bh(lock); + if (res < 0) + break; cond_resched(); + if (accum == SKARR_SZ) { + s_num = num + 1; + goto next_chunk; + } } done: diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 117982be0cab..a7d54cbcdabb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -405,7 +405,6 @@ void tcp_init_sock(struct sock *sk) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_clamp = ~0; tp->mss_cache = TCP_MSS_DEFAULT; - u64_stats_init(&tp->syncp); tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; tcp_enable_early_retrans(tp); @@ -2710,9 +2709,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp, intv; - unsigned int start; - int notsent_bytes; u64 rate64; + bool slow; u32 rate; memset(info, 0, sizeof(*info)); @@ -2792,17 +2790,17 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_total_retrans = tp->total_retrans; - do { - start = u64_stats_fetch_begin_irq(&tp->syncp); - put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked); - put_unaligned(tp->bytes_received, &info->tcpi_bytes_received); - } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); + slow = lock_sock_fast(sk); + + put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked); + put_unaligned(tp->bytes_received, &info->tcpi_bytes_received); + info->tcpi_notsent_bytes = max_t(int, 0, tp->write_seq - tp->snd_nxt); + + unlock_sock_fast(sk, slow); + info->tcpi_segs_out = tp->segs_out; info->tcpi_segs_in = tp->segs_in; - notsent_bytes = READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt); - info->tcpi_notsent_bytes = max(0, notsent_bytes); - info->tcpi_min_rtt = tcp_min_rtt(tp); info->tcpi_data_segs_in = tp->data_segs_in; info->tcpi_data_segs_out = tp->data_segs_out; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f2c59c8e57ff..a70046fea0e8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3351,9 +3351,7 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) u32 delta = ack - tp->snd_una; sock_owned_by_me((struct sock *)tp); - u64_stats_update_begin_raw(&tp->syncp); tp->bytes_acked += delta; - u64_stats_update_end_raw(&tp->syncp); tp->snd_una = ack; } @@ -3363,9 +3361,7 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) u32 delta = seq - tp->rcv_nxt; sock_owned_by_me((struct sock *)tp); - u64_stats_update_begin_raw(&tp->syncp); tp->bytes_received += delta; - u64_stats_update_end_raw(&tp->syncp); tp->rcv_nxt = seq; } -- cgit v1.2.3 From 5a3c7805c444d9d55f302a4b3930e8758be13fab Mon Sep 17 00:00:00 2001 From: Joachim Eastwood Date: Sat, 5 Nov 2016 14:04:52 +0100 Subject: Revert "net: stmmac: allow to split suspend/resume from init/exit callbacks" Instead of adding hooks inside stmmac_platform it is better to just use the standard PM callbacks within the specific dwmac-driver. This only used by the dwmac-rk driver. This reverts commit cecbc5563a02 ("stmmac: allow to split suspend/resume from init/exit callbacks"). Signed-off-by: Joachim Eastwood Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 8 ++------ include/linux/stmmac.h | 2 -- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 0a0d6a86f397..4d544c34c1f2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -417,9 +417,7 @@ static int stmmac_pltfr_suspend(struct device *dev) struct platform_device *pdev = to_platform_device(dev); ret = stmmac_suspend(dev); - if (priv->plat->suspend) - priv->plat->suspend(pdev, priv->plat->bsp_priv); - else if (priv->plat->exit) + if (priv->plat->exit) priv->plat->exit(pdev, priv->plat->bsp_priv); return ret; @@ -438,9 +436,7 @@ static int stmmac_pltfr_resume(struct device *dev) struct stmmac_priv *priv = netdev_priv(ndev); struct platform_device *pdev = to_platform_device(dev); - if (priv->plat->resume) - priv->plat->resume(pdev, priv->plat->bsp_priv); - else if (priv->plat->init) + if (priv->plat->init) priv->plat->init(pdev, priv->plat->bsp_priv); return stmmac_resume(dev); diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 705840e0438f..3537fb33cc90 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -135,8 +135,6 @@ struct plat_stmmacenet_data { void (*bus_setup)(void __iomem *ioaddr); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); - void (*suspend)(struct platform_device *pdev, void *priv); - void (*resume)(struct platform_device *pdev, void *priv); void *bsp_priv; struct stmmac_axi *axi; int has_gmac4; -- cgit v1.2.3 From c9f1b073d0d750ccf8b30b272d1d76479f4cccbc Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Mon, 7 Nov 2016 15:14:44 +0200 Subject: net/mlx5: Add creation flags when adding new flow table When creating flow tables, allow the caller to specify creation flags. Currently no flags are used and as such this patch doesn't add any new functionality. Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx5/main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 6 ++--- .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 7 +++--- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 7 +++++- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 28 +++++++++++++--------- drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 1 + include/linux/mlx5/fs.h | 10 ++++++-- 12 files changed, 45 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 292ae8bbeae2..9b16431e1de8 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1857,7 +1857,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, ft = mlx5_create_auto_grouped_flow_table(ns, priority, num_entries, num_groups, - 0); + 0, 0); if (!IS_ERR(ft)) { prio->refcount = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 8ff22e83e1dd..677b23810953 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -324,7 +324,7 @@ static int arfs_create_table(struct mlx5e_priv *priv, int err; ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_ARFS_TABLE_SIZE, MLX5E_ARFS_FT_LEVEL); + MLX5E_ARFS_TABLE_SIZE, MLX5E_ARFS_FT_LEVEL, 0); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index bed544d47ba1..9617892e0f15 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -777,7 +777,7 @@ static int mlx5e_create_ttc_table(struct mlx5e_priv *priv) int err; ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_TTC_TABLE_SIZE, MLX5E_TTC_FT_LEVEL); + MLX5E_TTC_TABLE_SIZE, MLX5E_TTC_FT_LEVEL, 0); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; @@ -948,7 +948,7 @@ static int mlx5e_create_l2_table(struct mlx5e_priv *priv) ft->num_groups = 0; ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_L2_TABLE_SIZE, MLX5E_L2_FT_LEVEL); + MLX5E_L2_TABLE_SIZE, MLX5E_L2_FT_LEVEL, 0); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); @@ -1038,7 +1038,7 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv) ft->num_groups = 0; ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_VLAN_TABLE_SIZE, MLX5E_VLAN_FT_LEVEL); + MLX5E_VLAN_TABLE_SIZE, MLX5E_VLAN_FT_LEVEL, 0); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index cf52c06377f2..87bb3db7b501 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -99,7 +99,7 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv, MLX5E_ETHTOOL_NUM_ENTRIES); ft = mlx5_create_auto_grouped_flow_table(ns, prio, table_size, - MLX5E_ETHTOOL_NUM_GROUPS, 0); + MLX5E_ETHTOOL_NUM_GROUPS, 0, 0); if (IS_ERR(ft)) return (void *)ft; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 165682e2d2be..cdd430330e8e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -83,7 +83,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, MLX5E_TC_PRIO, MLX5E_TC_TABLE_NUM_ENTRIES, MLX5E_TC_TABLE_NUM_GROUPS, - 0); + 0, 0); if (IS_ERR(priv->fs.tc.t)) { netdev_err(priv->netdev, "Failed to create tc offload table\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 9ee002ecb4bb..27f21ac66639 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -361,7 +361,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports) memset(flow_group_in, 0, inlen); table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0); + fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0, 0); if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create FDB Table err %d\n", err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 53d9d6ce008b..b18f9513e71e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -423,7 +423,8 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH, ESW_OFFLOADS_NUM_ENTRIES, - ESW_OFFLOADS_NUM_GROUPS, 0); + ESW_OFFLOADS_NUM_GROUPS, 0, + 0); if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err); @@ -432,7 +433,7 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) esw->fdb_table.fdb = fdb; table_size = nvports + MAX_PF_SQ + 1; - fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0); + fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0, 0); if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err); @@ -524,7 +525,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw) return -ENOMEM; } - ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0); + ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0, 0); if (IS_ERR(ft_offloads)) { err = PTR_ERR(ft_offloads); esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 301cec896eb6..cc97bb218e74 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -37,6 +37,7 @@ #include "fs_core.h" #include "fs_cmd.h" #include "mlx5_core.h" +#include "eswitch.h" int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft) @@ -61,8 +62,9 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, enum fs_flow_table_op_mod op_mod, enum fs_flow_table_type type, unsigned int level, unsigned int log_size, struct mlx5_flow_table - *next_ft, unsigned int *table_id) + *next_ft, unsigned int *table_id, u32 flags) { + int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN); u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0}; u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0}; int err; @@ -78,6 +80,9 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, MLX5_SET(create_flow_table_in, in, other_vport, 1); } + MLX5_SET(create_flow_table_in, in, decap_en, en_encap_decap); + MLX5_SET(create_flow_table_in, in, encap_en, en_encap_decap); + switch (op_mod) { case FS_FT_OP_MOD_NORMAL: if (next_ft) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index 86bead1748a7..8fad80688536 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -38,7 +38,7 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, enum fs_flow_table_op_mod op_mod, enum fs_flow_table_type type, unsigned int level, unsigned int log_size, struct mlx5_flow_table - *next_ft, unsigned int *table_id); + *next_ft, unsigned int *table_id, u32 flags); int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index e65eabf9c850..4d28c8d70482 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -505,7 +505,8 @@ static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in) static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte, enum fs_flow_table_type table_type, - enum fs_flow_table_op_mod op_mod) + enum fs_flow_table_op_mod op_mod, + u32 flags) { struct mlx5_flow_table *ft; @@ -519,6 +520,7 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft ft->type = table_type; ft->vport = vport; ft->max_fte = max_fte; + ft->flags = flags; INIT_LIST_HEAD(&ft->fwd_rules); mutex_init(&ft->lock); @@ -777,7 +779,8 @@ static void list_add_flow_table(struct mlx5_flow_table *ft, static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns, enum fs_flow_table_op_mod op_mod, u16 vport, int prio, - int max_fte, u32 level) + int max_fte, u32 level, + u32 flags) { struct mlx5_flow_table *next_ft = NULL; struct mlx5_flow_table *ft; @@ -810,7 +813,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa vport, max_fte ? roundup_pow_of_two(max_fte) : 0, root->table_type, - op_mod); + op_mod, flags); if (!ft) { err = -ENOMEM; goto unlock_root; @@ -820,7 +823,8 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; next_ft = find_next_chained_ft(fs_prio); err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type, - ft->level, log_table_sz, next_ft, &ft->id); + ft->level, log_table_sz, next_ft, &ft->id, + ft->flags); if (err) goto free_ft; @@ -845,10 +849,11 @@ unlock_root: struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, int prio, int max_fte, - u32 level) + u32 level, + u32 flags) { return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, 0, prio, - max_fte, level); + max_fte, level, flags); } struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, @@ -856,7 +861,7 @@ struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace u32 level, u16 vport) { return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, vport, prio, - max_fte, level); + max_fte, level, 0); } struct mlx5_flow_table *mlx5_create_lag_demux_flow_table( @@ -864,7 +869,7 @@ struct mlx5_flow_table *mlx5_create_lag_demux_flow_table( int prio, u32 level) { return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_LAG_DEMUX, 0, prio, 0, - level); + level, 0); } EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table); @@ -872,14 +877,15 @@ struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_nam int prio, int num_flow_table_entries, int max_num_groups, - u32 level) + u32 level, + u32 flags) { struct mlx5_flow_table *ft; if (max_num_groups > num_flow_table_entries) return ERR_PTR(-EINVAL); - ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries, level); + ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries, level, flags); if (IS_ERR(ft)) return ft; @@ -1822,7 +1828,7 @@ static int create_anchor_flow_table(struct mlx5_flow_steering *steering) ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR); if (!ns) return -EINVAL; - ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL); + ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL, 0); if (IS_ERR(ft)) { mlx5_core_err(steering->dev, "Failed to create last anchor flow table"); return PTR_ERR(ft); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index d5150888645c..9f616ed25a89 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -117,6 +117,7 @@ struct mlx5_flow_table { struct mutex lock; /* FWD rules that point on this flow table */ struct list_head fwd_rules; + u32 flags; }; struct mlx5_fc_cache { diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 0dcd287f4bd0..ab1a5fd2e995 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -42,6 +42,10 @@ enum { MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO = 1 << 16, }; +enum { + MLX5_FLOW_TABLE_TUNNEL_EN = BIT(0), +}; + #define LEFTOVERS_RULE_NUM 2 static inline void build_leftovers_ft_param(int *priority, int *n_ent, @@ -97,13 +101,15 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, int prio, int num_flow_table_entries, int max_num_groups, - u32 level); + u32 level, + u32 flags); struct mlx5_flow_table * mlx5_create_flow_table(struct mlx5_flow_namespace *ns, int prio, int num_flow_table_entries, - u32 level); + u32 level, + u32 flags); struct mlx5_flow_table * mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, int prio, -- cgit v1.2.3 From 66958ed906b87816314c0517f05fe0b5766ec7fe Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Mon, 7 Nov 2016 15:14:45 +0200 Subject: net/mlx5: Support encap id when setting new steering entry In order to support steering rules which add encapsulation headers, encap_id parameter is needed. Add new mlx5_flow_act struct which holds action related parameter: action, flow_tag and encap_id. Use mlx5_flow_act struct when adding a new steering rule. This patch doesn't change any functionality. Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx5/main.c | 10 ++--- drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 17 +++++--- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 29 ++++++++----- .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 10 ++--- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 9 ++-- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 23 ++++++----- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 25 ++++++----- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 48 ++++++++++------------ drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 1 + include/linux/mlx5/fs.h | 9 +++- 11 files changed, 104 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 9b16431e1de8..76ed57f1b678 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1877,10 +1877,10 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, { struct mlx5_flow_table *ft = ft_prio->flow_table; struct mlx5_ib_flow_handler *handler; + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_spec *spec; const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); unsigned int spec_index; - u32 action; int err = 0; if (!is_valid_attr(flow_attr)) @@ -1905,12 +1905,12 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, } spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); - action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : + flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; + flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; handler->rule = mlx5_add_flow_rules(ft, spec, - action, - MLX5_FS_DEFAULT_FLOW_TAG, - dst, 1); + &flow_act, + dst, 1); if (IS_ERR(handler->rule)) { err = PTR_ERR(handler->rule); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 677b23810953..68419a01db36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -174,6 +174,11 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, enum arfs_type type) { struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type]; + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, + .encap_id = 0, + }; struct mlx5_flow_destination dest; struct mlx5e_tir *tir = priv->indir_tir; struct mlx5_flow_spec *spec; @@ -206,8 +211,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, } arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, + &flow_act, &dest, 1); if (IS_ERR(arfs_t->default_rule)) { err = PTR_ERR(arfs_t->default_rule); @@ -465,6 +469,11 @@ static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs, static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, struct arfs_rule *arfs_rule) { + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, + .encap_id = 0, + }; struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; struct arfs_tuple *tuple = &arfs_rule->tuple; struct mlx5_flow_handle *rule = NULL; @@ -544,9 +553,7 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, } dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn; - rule = mlx5_add_flow_rules(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, - &dest, 1); + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); netdev_err(priv->netdev, "%s: add rule(filter id=%d, rq idx=%d) failed, err=%d\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 9617892e0f15..1fe80de5d68f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -158,6 +158,11 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, enum mlx5e_vlan_rule_type rule_type, u16 vid, struct mlx5_flow_spec *spec) { + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, + .encap_id = 0, + }; struct mlx5_flow_table *ft = priv->fs.vlan.ft.t; struct mlx5_flow_destination dest; struct mlx5_flow_handle **rule_p; @@ -187,10 +192,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, break; } - *rule_p = mlx5_add_flow_rules(ft, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, - &dest, 1); + *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(*rule_p)) { err = PTR_ERR(*rule_p); @@ -623,6 +625,11 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, u16 etype, u8 proto) { + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, + .encap_id = 0, + }; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; int err = 0; @@ -644,10 +651,7 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); } - rule = mlx5_add_flow_rules(ft, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, - dest, 1); + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); netdev_err(priv->netdev, "%s: add rule failed\n", __func__); @@ -810,6 +814,11 @@ static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, struct mlx5e_l2_rule *ai, int type) { + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, + .encap_id = 0, + }; struct mlx5_flow_table *ft = priv->fs.l2.ft.t; struct mlx5_flow_destination dest; struct mlx5_flow_spec *spec; @@ -848,9 +857,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, break; } - ai->rule = mlx5_add_flow_rules(ft, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest, 1); + ai->rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(ai->rule)) { netdev_err(priv->netdev, "%s: add l2 rule(mac:%pM) failed\n", __func__, mv_dmac); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 87bb3db7b501..3691451c728c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -290,10 +290,10 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, struct ethtool_rx_flow_spec *fs) { struct mlx5_flow_destination *dst = NULL; + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_spec *spec; struct mlx5_flow_handle *rule; int err = 0; - u32 action; spec = mlx5_vzalloc(sizeof(*spec)); if (!spec) @@ -304,7 +304,7 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, goto free; if (fs->ring_cookie == RX_CLS_FLOW_DISC) { - action = MLX5_FLOW_CONTEXT_ACTION_DROP; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; } else { dst = kzalloc(sizeof(*dst), GFP_KERNEL); if (!dst) { @@ -314,12 +314,12 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn; - action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)); - rule = mlx5_add_flow_rules(ft, spec, action, - MLX5_FS_DEFAULT_FLOW_TAG, dst, 1); + flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index cdd430330e8e..35e38d12ba68 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -61,6 +61,11 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, { struct mlx5_core_dev *dev = priv->mdev; struct mlx5_flow_destination dest = { 0 }; + struct mlx5_flow_act flow_act = { + .action = action, + .flow_tag = flow_tag, + .encap_id = 0, + }; struct mlx5_fc *counter = NULL; struct mlx5_flow_handle *rule; bool table_created = false; @@ -95,9 +100,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, } spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - rule = mlx5_add_flow_rules(priv->fs.tc.t, spec, - action, flow_tag, - &dest, 1); + rule = mlx5_add_flow_rules(priv->fs.tc.t, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) goto err_add_rule; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 27f21ac66639..ae05d27832e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -244,6 +244,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, int match_header = (is_zero_ether_addr(mac_c) ? 0 : MLX5_MATCH_OUTER_HEADERS); struct mlx5_flow_handle *flow_rule = NULL; + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_destination dest; struct mlx5_flow_spec *spec; void *mv_misc = NULL; @@ -285,10 +286,10 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n", dmac_v, dmac_c, vport); spec->match_criteria_enable = match_header; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->fdb_table.fdb, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - 0, &dest, 1); + &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { esw_warn(esw->dev, "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", @@ -1212,6 +1213,7 @@ static void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, static int esw_vport_ingress_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_spec *spec; int err = 0; u8 *smac_v; @@ -1264,10 +1266,10 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, } spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, - MLX5_FLOW_CONTEXT_ACTION_ALLOW, - 0, NULL, 0); + &flow_act, NULL, 0); if (IS_ERR(vport->ingress.allow_rule)) { err = PTR_ERR(vport->ingress.allow_rule); esw_warn(esw->dev, @@ -1278,10 +1280,10 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, } memset(spec, 0, sizeof(*spec)); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; vport->ingress.drop_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, - MLX5_FLOW_CONTEXT_ACTION_DROP, - 0, NULL, 0); + &flow_act, NULL, 0); if (IS_ERR(vport->ingress.drop_rule)) { err = PTR_ERR(vport->ingress.drop_rule); esw_warn(esw->dev, @@ -1301,6 +1303,7 @@ out: static int esw_vport_egress_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_spec *spec; int err = 0; @@ -1338,10 +1341,10 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan); spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; vport->egress.allowed_vlan = mlx5_add_flow_rules(vport->egress.acl, spec, - MLX5_FLOW_CONTEXT_ACTION_ALLOW, - 0, NULL, 0); + &flow_act, NULL, 0); if (IS_ERR(vport->egress.allowed_vlan)) { err = PTR_ERR(vport->egress.allowed_vlan); esw_warn(esw->dev, @@ -1353,10 +1356,10 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, /* Drop others rule (star rule) */ memset(spec, 0, sizeof(*spec)); + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; vport->egress.drop_rule = mlx5_add_flow_rules(vport->egress.acl, spec, - MLX5_FLOW_CONTEXT_ACTION_DROP, - 0, NULL, 0); + &flow_act, NULL, 0); if (IS_ERR(vport->egress.drop_rule)) { err = PTR_ERR(vport->egress.drop_rule); esw_warn(esw->dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b18f9513e71e..a390117ed34c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -49,23 +49,23 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *attr) { struct mlx5_flow_destination dest[2] = {}; + struct mlx5_flow_act flow_act = {0}; struct mlx5_fc *counter = NULL; struct mlx5_flow_handle *rule; void *misc; - int action; int i = 0; if (esw->mode != SRIOV_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); - action = attr->action; + flow_act.action = attr->action; - if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest[i].vport_num = attr->out_rep->vport; i++; } - if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(esw->dev, true); if (IS_ERR(counter)) return ERR_CAST(counter); @@ -84,7 +84,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, MLX5_MATCH_MISC_PARAMETERS; rule = mlx5_add_flow_rules((struct mlx5_flow_table *)esw->fdb_table.fdb, - spec, action, 0, dest, i); + spec, &flow_act, dest, i); if (IS_ERR(rule)) mlx5_fc_destroy(esw->dev, counter); @@ -274,6 +274,7 @@ out: static struct mlx5_flow_handle * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) { + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_destination dest; struct mlx5_flow_handle *flow_rule; struct mlx5_flow_spec *spec; @@ -297,10 +298,10 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport_num = vport; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - 0, &dest, 1); + &flow_act, &dest, 1); if (IS_ERR(flow_rule)) esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule)); out: @@ -363,6 +364,7 @@ out_err: static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) { + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_destination dest; struct mlx5_flow_handle *flow_rule = NULL; struct mlx5_flow_spec *spec; @@ -377,10 +379,10 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport_num = 0; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - 0, &dest, 1); + &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); esw_warn(esw->dev, "FDB: Failed to add miss flow rule err %d\n", err); @@ -591,6 +593,7 @@ static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw) struct mlx5_flow_handle * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) { + struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_destination dest; struct mlx5_flow_handle *flow_rule; struct mlx5_flow_spec *spec; @@ -613,9 +616,9 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; dest.tir_num = tirn; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - 0, &dest, 1); + &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule)); goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index cc97bb218e74..c4478ecd8056 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -248,6 +248,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, group_id, group_id); MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag); MLX5_SET(flow_context, in_flow_context, action, fte->action); + MLX5_SET(flow_context, in_flow_context, encap_id, fte->encap_id); in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, match_value); memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 4d28c8d70482..9adc766c7a3f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -460,8 +460,7 @@ static void del_flow_group(struct fs_node *node) fg->id, ft->id); } -static struct fs_fte *alloc_fte(u8 action, - u32 flow_tag, +static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act, u32 *match_value, unsigned int index) { @@ -473,9 +472,10 @@ static struct fs_fte *alloc_fte(u8 action, memcpy(fte->val, match_value, sizeof(fte->val)); fte->node.type = FS_TYPE_FLOW_ENTRY; - fte->flow_tag = flow_tag; + fte->flow_tag = flow_act->flow_tag; fte->index = index; - fte->action = action; + fte->action = flow_act->action; + fte->encap_id = flow_act->encap_id; return fte; } @@ -1117,15 +1117,14 @@ static unsigned int get_free_fte_index(struct mlx5_flow_group *fg, /* prev is output, prev->next = new_fte */ static struct fs_fte *create_fte(struct mlx5_flow_group *fg, u32 *match_value, - u8 action, - u32 flow_tag, + struct mlx5_flow_act *flow_act, struct list_head **prev) { struct fs_fte *fte; int index; index = get_free_fte_index(fg, prev); - fte = alloc_fte(action, flow_tag, match_value, index); + fte = alloc_fte(flow_act, match_value, index); if (IS_ERR(fte)) return fte; @@ -1219,8 +1218,7 @@ static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte, static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, u32 *match_value, - u8 action, - u32 flow_tag, + struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num) { @@ -1234,12 +1232,13 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, fs_for_each_fte(fte, fg) { nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); if (compare_match_value(&fg->mask, match_value, &fte->val) && - (action & fte->action) && flow_tag == fte->flow_tag) { + (flow_act->action & fte->action) && + flow_act->flow_tag == fte->flow_tag) { int old_action = fte->action; - fte->action |= action; + fte->action |= flow_act->action; handle = add_rule_fte(fte, fg, dest, dest_num, - old_action != action); + old_action != flow_act->action); if (IS_ERR(handle)) { fte->action = old_action; goto unlock_fte; @@ -1255,7 +1254,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, goto unlock_fg; } - fte = create_fte(fg, match_value, action, flow_tag, &prev); + fte = create_fte(fg, match_value, flow_act, &prev); if (IS_ERR(fte)) { handle = (void *)fte; goto unlock_fg; @@ -1332,17 +1331,17 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest, static struct mlx5_flow_handle * _mlx5_add_flow_rules(struct mlx5_flow_table *ft, struct mlx5_flow_spec *spec, - u32 action, - u32 flow_tag, + struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num) + { struct mlx5_flow_group *g; struct mlx5_flow_handle *rule; int i; for (i = 0; i < dest_num; i++) { - if (!dest_is_valid(&dest[i], action, ft)) + if (!dest_is_valid(&dest[i], flow_act->action, ft)) return ERR_PTR(-EINVAL); } @@ -1353,7 +1352,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, g->mask.match_criteria, spec->match_criteria)) { rule = add_rule_fg(g, spec->match_value, - action, flow_tag, dest, dest_num); + flow_act, dest, dest_num); if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) goto unlock; } @@ -1365,8 +1364,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, goto unlock; } - rule = add_rule_fg(g, spec->match_value, - action, flow_tag, dest, dest_num); + rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num); if (IS_ERR(rule)) { /* Remove assumes refcount > 0 and autogroup creates a group * with a refcount = 0. @@ -1390,8 +1388,7 @@ static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) struct mlx5_flow_handle * mlx5_add_flow_rules(struct mlx5_flow_table *ft, struct mlx5_flow_spec *spec, - u32 action, - u32 flow_tag, + struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num) { @@ -1399,11 +1396,11 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft, struct mlx5_flow_destination gen_dest; struct mlx5_flow_table *next_ft = NULL; struct mlx5_flow_handle *handle = NULL; - u32 sw_action = action; + u32 sw_action = flow_act->action; struct fs_prio *prio; fs_get_obj(prio, ft->node.parent); - if (action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { + if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { if (!fwd_next_prio_supported(ft)) return ERR_PTR(-EOPNOTSUPP); if (dest) @@ -1415,15 +1412,14 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft, gen_dest.ft = next_ft; dest = &gen_dest; dest_num = 1; - action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } else { mutex_unlock(&root->chain_lock); return ERR_PTR(-EOPNOTSUPP); } } - handle = _mlx5_add_flow_rules(ft, spec, action, flow_tag, dest, - dest_num); + handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, dest_num); if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { if (!IS_ERR_OR_NULL(handle) && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 9f616ed25a89..8e668c63f69e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -151,6 +151,7 @@ struct fs_fte { u32 flow_tag; u32 index; u32 action; + u32 encap_id; enum fs_fte_status status; struct mlx5_fc *counter; }; diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index ab1a5fd2e995..949b24b6c479 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -130,14 +130,19 @@ struct mlx5_flow_group * mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *in); void mlx5_destroy_flow_group(struct mlx5_flow_group *fg); +struct mlx5_flow_act { + u32 action; + u32 flow_tag; + u32 encap_id; +}; + /* Single destination per rule. * Group ID is implied by the match criteria. */ struct mlx5_flow_handle * mlx5_add_flow_rules(struct mlx5_flow_table *ft, struct mlx5_flow_spec *spec, - u32 action, - u32 flow_tag, + struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num); void mlx5_del_flow_rules(struct mlx5_flow_handle *fr); -- cgit v1.2.3 From 72478a0cc4025e16f68672844ebebf60524e1668 Mon Sep 17 00:00:00 2001 From: Milo Kim Date: Fri, 28 Oct 2016 21:37:02 +0900 Subject: mfd: tps65217: Fix mismatched interrupt number Enum value of 'tps65217_irq_type' is not matched with DT parsed hwirq number[*]. The MFD driver gets the IRQ data by referencing hwirq, but the value is different. So, irq_to_tps65217_irq() returns mismatched IRQ data. Eventually, the power button driver enables not PB but USB interrupt when it is probed. According to the TPS65217 register map[**], USB interrupt is the LSB. This patch defines synchronized IRQ value. [*] include/dt-bindings/mfd/tps65217.h [**] http://www.ti.com/lit/ds/symlink/tps65217.pdf Signed-off-by: Milo Kim Signed-off-by: Tony Lindgren --- include/linux/mfd/tps65217.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tps65217.h b/include/linux/mfd/tps65217.h index 4ccda8969639..3cbec4b2496a 100644 --- a/include/linux/mfd/tps65217.h +++ b/include/linux/mfd/tps65217.h @@ -234,12 +234,11 @@ struct tps65217_bl_pdata { int dft_brightness; }; -enum tps65217_irq_type { - TPS65217_IRQ_PB, - TPS65217_IRQ_AC, - TPS65217_IRQ_USB, - TPS65217_NUM_IRQ -}; +/* Interrupt numbers */ +#define TPS65217_IRQ_USB 0 +#define TPS65217_IRQ_AC 1 +#define TPS65217_IRQ_PB 2 +#define TPS65217_NUM_IRQ 3 /** * struct tps65217_board - packages regulator init data -- cgit v1.2.3 From fc4d24c9b47150245b3eb5bebc2ad4764c754ef4 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 3 Nov 2016 15:49:57 +0100 Subject: fs/buffer: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: linux-fsdevel@vger.kernel.org Cc: Alexander Viro Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20161103145021.28528-2-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- fs/buffer.c | 16 ++++++---------- include/linux/cpuhotplug.h | 1 + 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index b205a629001d..1613656028d6 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3403,7 +3403,7 @@ void free_buffer_head(struct buffer_head *bh) } EXPORT_SYMBOL(free_buffer_head); -static void buffer_exit_cpu(int cpu) +static int buffer_exit_cpu_dead(unsigned int cpu) { int i; struct bh_lru *b = &per_cpu(bh_lrus, cpu); @@ -3414,14 +3414,7 @@ static void buffer_exit_cpu(int cpu) } this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr); per_cpu(bh_accounting, cpu).nr = 0; -} - -static int buffer_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) - buffer_exit_cpu((unsigned long)hcpu); - return NOTIFY_OK; + return 0; } /** @@ -3471,6 +3464,7 @@ EXPORT_SYMBOL(bh_submit_read); void __init buffer_init(void) { unsigned long nrpages; + int ret; bh_cachep = kmem_cache_create("buffer_head", sizeof(struct buffer_head), 0, @@ -3483,5 +3477,7 @@ void __init buffer_init(void) */ nrpages = (nr_free_buffer_pages() * 10) / 100; max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head)); - hotcpu_notifier(buffer_cpu_notify, 0); + ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead", + NULL, buffer_exit_cpu_dead); + WARN_ON(ret < 0); } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index afe641c02dca..69b74fa0da60 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -30,6 +30,7 @@ enum cpuhp_state { CPUHP_ACPI_CPUDRV_DEAD, CPUHP_S390_PFAULT_DEAD, CPUHP_BLK_MQ_DEAD, + CPUHP_FS_BUFF_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, -- cgit v1.2.3 From 90b14889d2f9b29d7e5b4b2d36251c13ce3dd13f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 3 Nov 2016 15:49:58 +0100 Subject: kernel/printk: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: Andrew Morton Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20161103145021.28528-3-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + kernel/printk/printk.c | 29 +++++++++++++---------------- 2 files changed, 14 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 69b74fa0da60..4174083280d7 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -31,6 +31,7 @@ enum cpuhp_state { CPUHP_S390_PFAULT_DEAD, CPUHP_BLK_MQ_DEAD, CPUHP_FS_BUFF_DEAD, + CPUHP_PRINTK_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index de08fc90baaf..4487ffcd42d5 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2185,27 +2185,18 @@ void resume_console(void) /** * console_cpu_notify - print deferred console messages after CPU hotplug - * @self: notifier struct - * @action: CPU hotplug event - * @hcpu: unused + * @cpu: unused * * If printk() is called from a CPU that is not online yet, the messages * will be spooled but will not show up on the console. This function is * called when a new CPU comes online (or fails to come up), and ensures * that any such output gets printed. */ -static int console_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - switch (action) { - case CPU_ONLINE: - case CPU_DEAD: - case CPU_DOWN_FAILED: - case CPU_UP_CANCELED: - console_lock(); - console_unlock(); - } - return NOTIFY_OK; +static int console_cpu_notify(unsigned int cpu) +{ + console_lock(); + console_unlock(); + return 0; } /** @@ -2843,6 +2834,7 @@ EXPORT_SYMBOL(unregister_console); static int __init printk_late_init(void) { struct console *con; + int ret; for_each_console(con) { if (!keep_bootcon && con->flags & CON_BOOT) { @@ -2857,7 +2849,12 @@ static int __init printk_late_init(void) unregister_console(con); } } - hotcpu_notifier(console_cpu_notify, 0); + ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL, + console_cpu_notify); + WARN_ON(ret < 0); + ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online", + console_cpu_notify, NULL); + WARN_ON(ret < 0); return 0; } late_initcall(printk_late_init); -- cgit v1.2.3 From 308167fcb330296fc80505a6b11ba0661f38a4cc Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 3 Nov 2016 15:49:59 +0100 Subject: mm/memcg: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: Michal Hocko Cc: linux-mm@kvack.org Cc: rt@linutronix.de Cc: Johannes Weiner Cc: cgroups@vger.kernel.org Link: http://lkml.kernel.org/r/20161103145021.28528-4-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + mm/memcontrol.c | 24 ++++++++---------------- 2 files changed, 9 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 4174083280d7..c622ab349af3 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -32,6 +32,7 @@ enum cpuhp_state { CPUHP_BLK_MQ_DEAD, CPUHP_FS_BUFF_DEAD, CPUHP_PRINTK_DEAD, + CPUHP_MM_MEMCQ_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 0f870ba43942..6c2043509fb5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1816,22 +1816,13 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) mutex_unlock(&percpu_charge_mutex); } -static int memcg_cpu_hotplug_callback(struct notifier_block *nb, - unsigned long action, - void *hcpu) +static int memcg_hotplug_cpu_dead(unsigned int cpu) { - int cpu = (unsigned long)hcpu; struct memcg_stock_pcp *stock; - if (action == CPU_ONLINE) - return NOTIFY_OK; - - if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) - return NOTIFY_OK; - stock = &per_cpu(memcg_stock, cpu); drain_stock(stock); - return NOTIFY_OK; + return 0; } static void reclaim_high(struct mem_cgroup *memcg, @@ -5774,16 +5765,17 @@ __setup("cgroup.memory=", cgroup_memory); /* * subsys_initcall() for memory controller. * - * Some parts like hotcpu_notifier() have to be initialized from this context - * because of lock dependencies (cgroup_lock -> cpu hotplug) but basically - * everything that doesn't depend on a specific mem_cgroup structure should - * be initialized from here. + * Some parts like memcg_hotplug_cpu_dead() have to be initialized from this + * context because of lock dependencies (cgroup_lock -> cpu hotplug) but + * basically everything that doesn't depend on a specific mem_cgroup structure + * should be initialized from here. */ static int __init mem_cgroup_init(void) { int cpu, node; - hotcpu_notifier(memcg_cpu_hotplug_callback, 0); + cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL, + memcg_hotplug_cpu_dead); for_each_possible_cpu(cpu) INIT_WORK(&per_cpu_ptr(&memcg_stock, cpu)->work, -- cgit v1.2.3 From 5588f5afb4cfc33eb377b751ba4b97184373e8d6 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 3 Nov 2016 15:50:00 +0100 Subject: lib/percpu_counter: Convert to hotplug state machine Install the callbacks via the state machine and let the core invoke the callbacks on the already online CPUs. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20161103145021.28528-5-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + lib/percpu_counter.c | 25 ++++++++++++++----------- 2 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index c622ab349af3..04e5f99ffc70 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -33,6 +33,7 @@ enum cpuhp_state { CPUHP_FS_BUFF_DEAD, CPUHP_PRINTK_DEAD, CPUHP_MM_MEMCQ_DEAD, + CPUHP_PERCPU_CNT_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 72d36113ccaa..c8cebb137076 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -158,25 +158,21 @@ EXPORT_SYMBOL(percpu_counter_destroy); int percpu_counter_batch __read_mostly = 32; EXPORT_SYMBOL(percpu_counter_batch); -static void compute_batch_value(void) +static int compute_batch_value(unsigned int cpu) { int nr = num_online_cpus(); percpu_counter_batch = max(32, nr*2); + return 0; } -static int percpu_counter_hotcpu_callback(struct notifier_block *nb, - unsigned long action, void *hcpu) +static int percpu_counter_cpu_dead(unsigned int cpu) { #ifdef CONFIG_HOTPLUG_CPU - unsigned int cpu; struct percpu_counter *fbc; - compute_batch_value(); - if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) - return NOTIFY_OK; + compute_batch_value(cpu); - cpu = (unsigned long)hcpu; spin_lock_irq(&percpu_counters_lock); list_for_each_entry(fbc, &percpu_counters, list) { s32 *pcount; @@ -190,7 +186,7 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb, } spin_unlock_irq(&percpu_counters_lock); #endif - return NOTIFY_OK; + return 0; } /* @@ -222,8 +218,15 @@ EXPORT_SYMBOL(__percpu_counter_compare); static int __init percpu_counter_startup(void) { - compute_batch_value(); - hotcpu_notifier(percpu_counter_hotcpu_callback, 0); + int ret; + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "lib/percpu_cnt:online", + compute_batch_value, NULL); + WARN_ON(ret < 0); + ret = cpuhp_setup_state_nocalls(CPUHP_PERCPU_CNT_DEAD, + "lib/percpu_cnt:dead", NULL, + percpu_counter_cpu_dead); + WARN_ON(ret < 0); return 0; } module_init(percpu_counter_startup); -- cgit v1.2.3 From d544abd5ff7d8b07c0c67682a63e4939c3c82914 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 3 Nov 2016 15:50:01 +0100 Subject: lib/radix-tree: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: Andrew Morton Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20161103145021.28528-6-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + lib/radix-tree.c | 25 ++++++++++++------------- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 04e5f99ffc70..89310fb1031d 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -34,6 +34,7 @@ enum cpuhp_state { CPUHP_PRINTK_DEAD, CPUHP_MM_MEMCQ_DEAD, CPUHP_PERCPU_CNT_DEAD, + CPUHP_RADIX_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 8e6d552c40dd..4b8bb3618b83 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1642,32 +1642,31 @@ static __init void radix_tree_init_maxnodes(void) } } -static int radix_tree_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) +static int radix_tree_cpu_dead(unsigned int cpu) { - int cpu = (long)hcpu; struct radix_tree_preload *rtp; struct radix_tree_node *node; /* Free per-cpu pool of preloaded nodes */ - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { - rtp = &per_cpu(radix_tree_preloads, cpu); - while (rtp->nr) { - node = rtp->nodes; - rtp->nodes = node->private_data; - kmem_cache_free(radix_tree_node_cachep, node); - rtp->nr--; - } + rtp = &per_cpu(radix_tree_preloads, cpu); + while (rtp->nr) { + node = rtp->nodes; + rtp->nodes = node->private_data; + kmem_cache_free(radix_tree_node_cachep, node); + rtp->nr--; } - return NOTIFY_OK; + return 0; } void __init radix_tree_init(void) { + int ret; radix_tree_node_cachep = kmem_cache_create("radix_tree_node", sizeof(struct radix_tree_node), 0, SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, radix_tree_node_ctor); radix_tree_init_maxnodes(); - hotcpu_notifier(radix_tree_callback, 0); + ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead", + NULL, radix_tree_cpu_dead); + WARN_ON(ret < 0); } -- cgit v1.2.3 From 005fd4bbef168e9dea896085b001d64369e9834a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 3 Nov 2016 15:50:02 +0100 Subject: mm/page_alloc: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: linux-mm@kvack.org Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20161103145021.28528-7-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + mm/page_alloc.c | 49 +++++++++++++++++++++++----------------------- 2 files changed, 26 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 89310fb1031d..31c58f6ec3c6 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -35,6 +35,7 @@ enum cpuhp_state { CPUHP_MM_MEMCQ_DEAD, CPUHP_PERCPU_CNT_DEAD, CPUHP_RADIX_DEAD, + CPUHP_PAGE_ALLOC_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 072d791dce2d..fc98c2bae905 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6491,38 +6491,39 @@ void __init free_area_init(unsigned long *zones_size) __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); } -static int page_alloc_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int page_alloc_cpu_dead(unsigned int cpu) { - int cpu = (unsigned long)hcpu; - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { - lru_add_drain_cpu(cpu); - drain_pages(cpu); + lru_add_drain_cpu(cpu); + drain_pages(cpu); - /* - * Spill the event counters of the dead processor - * into the current processors event counters. - * This artificially elevates the count of the current - * processor. - */ - vm_events_fold_cpu(cpu); + /* + * Spill the event counters of the dead processor + * into the current processors event counters. + * This artificially elevates the count of the current + * processor. + */ + vm_events_fold_cpu(cpu); - /* - * Zero the differential counters of the dead processor - * so that the vm statistics are consistent. - * - * This is only okay since the processor is dead and cannot - * race with what we are doing. - */ - cpu_vm_stats_fold(cpu); - } - return NOTIFY_OK; + /* + * Zero the differential counters of the dead processor + * so that the vm statistics are consistent. + * + * This is only okay since the processor is dead and cannot + * race with what we are doing. + */ + cpu_vm_stats_fold(cpu); + return 0; } void __init page_alloc_init(void) { - hotcpu_notifier(page_alloc_cpu_notify, 0); + int ret; + + ret = cpuhp_setup_state_nocalls(CPUHP_PAGE_ALLOC_DEAD, + "mm/page_alloc:dead", NULL, + page_alloc_cpu_dead); + WARN_ON(ret < 0); } /* -- cgit v1.2.3 From f0bf90def3528cebed45ebd81d9b5d0fa17d7422 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 3 Nov 2016 15:50:04 +0100 Subject: net/dev: Convert to hotplug state machine Install the callbacks via the state machine. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: netdev@vger.kernel.org Cc: "David S. Miller" Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20161103145021.28528-9-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + net/core/dev.c | 16 ++++++---------- 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 31c58f6ec3c6..394eb7ed53be 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -36,6 +36,7 @@ enum cpuhp_state { CPUHP_PERCPU_CNT_DEAD, CPUHP_RADIX_DEAD, CPUHP_PAGE_ALLOC_DEAD, + CPUHP_NET_DEV_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, diff --git a/net/core/dev.c b/net/core/dev.c index 820bac239738..8e909b2a5f2f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7953,18 +7953,13 @@ out: } EXPORT_SYMBOL_GPL(dev_change_net_namespace); -static int dev_cpu_callback(struct notifier_block *nfb, - unsigned long action, - void *ocpu) +static int dev_cpu_dead(unsigned int oldcpu) { struct sk_buff **list_skb; struct sk_buff *skb; - unsigned int cpu, oldcpu = (unsigned long)ocpu; + unsigned int cpu; struct softnet_data *sd, *oldsd; - if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) - return NOTIFY_OK; - local_irq_disable(); cpu = smp_processor_id(); sd = &per_cpu(softnet_data, cpu); @@ -8014,10 +8009,9 @@ static int dev_cpu_callback(struct notifier_block *nfb, input_queue_head_incr(oldsd); } - return NOTIFY_OK; + return 0; } - /** * netdev_increment_features - increment feature set by one * @all: current feature set @@ -8351,7 +8345,9 @@ static int __init net_dev_init(void) open_softirq(NET_TX_SOFTIRQ, net_tx_action); open_softirq(NET_RX_SOFTIRQ, net_rx_action); - hotcpu_notifier(dev_cpu_callback, 0); + rc = cpuhp_setup_state_nocalls(CPUHP_NET_DEV_DEAD, "net/dev:dead", + NULL, dev_cpu_dead); + WARN_ON(rc < 0); dst_subsys_init(); rc = 0; out: -- cgit v1.2.3 From a4fc1bfc42062e8bc7b2271a90d17403b096ce5d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 3 Nov 2016 15:50:05 +0100 Subject: net/flowcache: Convert to hotplug state machine Install the callbacks via the state machine. Use multi state support to avoid custom list handling for the multiple instances. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: Steffen Klassert Cc: Herbert Xu Cc: netdev@vger.kernel.org Cc: rt@linutronix.de Cc: "David S. Miller" Link: http://lkml.kernel.org/r/20161103145021.28528-10-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + include/net/flow.h | 1 + include/net/flowcache.h | 2 +- net/core/flow.c | 60 ++++++++++++++++++++-------------------------- net/xfrm/xfrm_policy.c | 1 + 5 files changed, 30 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 394eb7ed53be..86b940f19df8 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -56,6 +56,7 @@ enum cpuhp_state { CPUHP_ARM_SHMOBILE_SCU_PREPARE, CPUHP_SH_SH3X_PREPARE, CPUHP_BLK_MQ_PREPARE, + CPUHP_NET_FLOW_PREPARE, CPUHP_TIMERS_DEAD, CPUHP_NOTF_ERR_INJ_PREPARE, CPUHP_MIPS_SOC_PREPARE, diff --git a/include/net/flow.h b/include/net/flow.h index 035aa7716967..2e386bd6ee63 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -239,6 +239,7 @@ struct flow_cache_object *flow_cache_lookup(struct net *net, void *ctx); int flow_cache_init(struct net *net); void flow_cache_fini(struct net *net); +void flow_cache_hp_init(void); void flow_cache_flush(struct net *net); void flow_cache_flush_deferred(struct net *net); diff --git a/include/net/flowcache.h b/include/net/flowcache.h index c8f665ec6e0d..9caf3bfc8d2d 100644 --- a/include/net/flowcache.h +++ b/include/net/flowcache.h @@ -17,7 +17,7 @@ struct flow_cache_percpu { struct flow_cache { u32 hash_shift; struct flow_cache_percpu __percpu *percpu; - struct notifier_block hotcpu_notifier; + struct hlist_node node; int low_watermark; int high_watermark; struct timer_list rnd_timer; diff --git a/net/core/flow.c b/net/core/flow.c index 3937b1b68d5b..841fd7f87b30 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -419,28 +419,20 @@ static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) return 0; } -static int flow_cache_cpu(struct notifier_block *nfb, - unsigned long action, - void *hcpu) +static int flow_cache_cpu_up_prep(unsigned int cpu, struct hlist_node *node) { - struct flow_cache *fc = container_of(nfb, struct flow_cache, - hotcpu_notifier); - int res, cpu = (unsigned long) hcpu; + struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node); + + return flow_cache_cpu_prepare(fc, cpu); +} + +static int flow_cache_cpu_dead(unsigned int cpu, struct hlist_node *node) +{ + struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node); struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - res = flow_cache_cpu_prepare(fc, cpu); - if (res) - return notifier_from_errno(res); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - __flow_cache_shrink(fc, fcp, 0); - break; - } - return NOTIFY_OK; + __flow_cache_shrink(fc, fcp, 0); + return 0; } int flow_cache_init(struct net *net) @@ -467,18 +459,8 @@ int flow_cache_init(struct net *net) if (!fc->percpu) return -ENOMEM; - cpu_notifier_register_begin(); - - for_each_online_cpu(i) { - if (flow_cache_cpu_prepare(fc, i)) - goto err; - } - fc->hotcpu_notifier = (struct notifier_block){ - .notifier_call = flow_cache_cpu, - }; - __register_hotcpu_notifier(&fc->hotcpu_notifier); - - cpu_notifier_register_done(); + if (cpuhp_state_add_instance(CPUHP_NET_FLOW_PREPARE, &fc->node)) + goto err; setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, (unsigned long) fc); @@ -494,8 +476,6 @@ err: fcp->hash_table = NULL; } - cpu_notifier_register_done(); - free_percpu(fc->percpu); fc->percpu = NULL; @@ -509,7 +489,8 @@ void flow_cache_fini(struct net *net) struct flow_cache *fc = &net->xfrm.flow_cache_global; del_timer_sync(&fc->rnd_timer); - unregister_hotcpu_notifier(&fc->hotcpu_notifier); + + cpuhp_state_remove_instance_nocalls(CPUHP_NET_FLOW_PREPARE, &fc->node); for_each_possible_cpu(i) { struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i); @@ -521,3 +502,14 @@ void flow_cache_fini(struct net *net) fc->percpu = NULL; } EXPORT_SYMBOL(flow_cache_fini); + +void __init flow_cache_hp_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_NET_FLOW_PREPARE, + "net/flow:prepare", + flow_cache_cpu_up_prep, + flow_cache_cpu_dead); + WARN_ON(ret < 0); +} diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index fd6986634e6f..4a8eff11bdad 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3111,6 +3111,7 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { void __init xfrm_init(void) { + flow_cache_hp_init(); register_pernet_subsys(&xfrm_net_ops); seqcount_init(&xfrm_policy_hash_generation); xfrm_input_init(); -- cgit v1.2.3 From 38643a0e691ec947d311eb2db011b289cf95014e Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 3 Nov 2016 15:50:09 +0100 Subject: drivers base/topology: Convert to hotplug state machine Install the callbacks via the state machine and let the core invoke the callbacks on the already online CPUs. No functional change Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20161103145021.28528-14-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- drivers/base/topology.c | 42 +++++------------------------------------- include/linux/cpuhotplug.h | 1 + 2 files changed, 6 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/topology.c b/drivers/base/topology.c index df3c97cb4c99..d6ec1c546f5b 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -118,51 +118,19 @@ static int topology_add_dev(unsigned int cpu) return sysfs_create_group(&dev->kobj, &topology_attr_group); } -static void topology_remove_dev(unsigned int cpu) +static int topology_remove_dev(unsigned int cpu) { struct device *dev = get_cpu_device(cpu); sysfs_remove_group(&dev->kobj, &topology_attr_group); -} - -static int topology_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - int rc = 0; - - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - rc = topology_add_dev(cpu); - break; - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - case CPU_DEAD: - case CPU_DEAD_FROZEN: - topology_remove_dev(cpu); - break; - } - return notifier_from_errno(rc); + return 0; } static int topology_sysfs_init(void) { - int cpu; - int rc = 0; - - cpu_notifier_register_begin(); - - for_each_online_cpu(cpu) { - rc = topology_add_dev(cpu); - if (rc) - goto out; - } - __hotcpu_notifier(topology_cpu_callback, 0); - -out: - cpu_notifier_register_done(); - return rc; + return cpuhp_setup_state(CPUHP_TOPOLOGY_PREPARE, + "base/topology:prepare", topology_add_dev, + topology_remove_dev); } device_initcall(topology_sysfs_init); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 86b940f19df8..3410d83cc2e2 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -57,6 +57,7 @@ enum cpuhp_state { CPUHP_SH_SH3X_PREPARE, CPUHP_BLK_MQ_PREPARE, CPUHP_NET_FLOW_PREPARE, + CPUHP_TOPOLOGY_PREPARE, CPUHP_TIMERS_DEAD, CPUHP_NOTF_ERR_INJ_PREPARE, CPUHP_MIPS_SOC_PREPARE, -- cgit v1.2.3 From 9484ab1bf4464faae695321dd4fa66365beda74e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 10 Nov 2016 10:26:50 +1100 Subject: dax: Introduce IOMAP_FAULT flag Introduce a flag telling iomap operations whether they are handling a fault or other IO. That may influence behavior wrt inode size and similar things. Signed-off-by: Jan Kara Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/dax.c | 4 ++-- fs/iomap.c | 5 +++-- include/linux/iomap.h | 1 + 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/dax.c b/fs/dax.c index 281e91a63367..28af41b9da3a 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1266,7 +1266,7 @@ int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT; sector_t sector; struct iomap iomap = { 0 }; - unsigned flags = 0; + unsigned flags = IOMAP_FAULT; int error, major = 0; int locked_status = 0; void *entry; @@ -1467,7 +1467,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address, struct address_space *mapping = vma->vm_file->f_mapping; unsigned long pmd_addr = address & PMD_MASK; bool write = flags & FAULT_FLAG_WRITE; - unsigned int iomap_flags = write ? IOMAP_WRITE : 0; + unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT; struct inode *inode = mapping->host; int result = VM_FAULT_FALLBACK; struct iomap iomap = { 0 }; diff --git a/fs/iomap.c b/fs/iomap.c index 013d1d36fbbf..51a02573405e 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -468,8 +468,9 @@ int iomap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, offset = page_offset(page); while (length > 0) { - ret = iomap_apply(inode, offset, length, IOMAP_WRITE, - ops, page, iomap_page_mkwrite_actor); + ret = iomap_apply(inode, offset, length, + IOMAP_WRITE | IOMAP_FAULT, ops, page, + iomap_page_mkwrite_actor); if (unlikely(ret <= 0)) goto out_unlock; offset += ret; diff --git a/include/linux/iomap.h b/include/linux/iomap.h index e63e288dee83..b9e7b8ec8c1d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -44,6 +44,7 @@ struct iomap { */ #define IOMAP_WRITE (1 << 0) #define IOMAP_ZERO (1 << 1) +#define IOMAP_FAULT (1 << 3) /* mapping for page fault */ struct iomap_ops { /* -- cgit v1.2.3 From 1ababeba4a21f3dba3da3523c670b207fb2feb62 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Tue, 8 Nov 2016 14:57:39 +0100 Subject: ipv6: implement dataplane support for rthdr type 4 (Segment Routing Header) Implement minimal support for processing of SR-enabled packets as described in https://tools.ietf.org/html/draft-ietf-6man-segment-routing-header-02. This patch implements the following operations: - Intermediate segment endpoint: incrementation of active segment and rerouting. - Egress for SR-encapsulated packets: decapsulation of outer IPv6 header + SRH and routing of inner packet. - Cleanup flag support for SR-inlined packets: removal of SRH if we are the penultimate segment endpoint. A per-interface sysctl seg6_enabled is provided, to accept/deny SR-enabled packets. Default is deny. This patch does not provide support for HMAC-signed packets. Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + include/linux/seg6.h | 6 ++ include/net/seg6.h | 36 ++++++++++ include/uapi/linux/ipv6.h | 2 + include/uapi/linux/seg6.h | 54 ++++++++++++++ net/ipv6/addrconf.c | 10 +++ net/ipv6/exthdrs.c | 175 ++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 284 insertions(+) create mode 100644 include/linux/seg6.h create mode 100644 include/net/seg6.h create mode 100644 include/uapi/linux/seg6.h (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 1afb6e8d35c3..68d3f71f0abf 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -64,6 +64,7 @@ struct ipv6_devconf { } stable_secret; __s32 use_oif_addrs_only; __s32 keep_addr_on_down; + __s32 seg6_enabled; struct ctl_table_header *sysctl_header; }; diff --git a/include/linux/seg6.h b/include/linux/seg6.h new file mode 100644 index 000000000000..7a66d2b4c5a6 --- /dev/null +++ b/include/linux/seg6.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_H +#define _LINUX_SEG6_H + +#include + +#endif diff --git a/include/net/seg6.h b/include/net/seg6.h new file mode 100644 index 000000000000..4dd52a7e95f1 --- /dev/null +++ b/include/net/seg6.h @@ -0,0 +1,36 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _NET_SEG6_H +#define _NET_SEG6_H + +static inline void update_csum_diff4(struct sk_buff *skb, __be32 from, + __be32 to) +{ + __be32 diff[] = { ~from, to }; + + skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); +} + +static inline void update_csum_diff16(struct sk_buff *skb, __be32 *from, + __be32 *to) +{ + __be32 diff[] = { + ~from[0], ~from[1], ~from[2], ~from[3], + to[0], to[1], to[2], to[3], + }; + + skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); +} + +#endif diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 8c2772340c3f..7ff1d654e333 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -39,6 +39,7 @@ struct in6_ifreq { #define IPV6_SRCRT_STRICT 0x01 /* Deprecated; will be removed */ #define IPV6_SRCRT_TYPE_0 0 /* Deprecated; will be removed */ #define IPV6_SRCRT_TYPE_2 2 /* IPv6 type 2 Routing Header */ +#define IPV6_SRCRT_TYPE_4 4 /* Segment Routing with IPv6 */ /* * routing header @@ -178,6 +179,7 @@ enum { DEVCONF_DROP_UNSOLICITED_NA, DEVCONF_KEEP_ADDR_ON_DOWN, DEVCONF_RTR_SOLICIT_MAX_INTERVAL, + DEVCONF_SEG6_ENABLED, DEVCONF_MAX }; diff --git a/include/uapi/linux/seg6.h b/include/uapi/linux/seg6.h new file mode 100644 index 000000000000..c396a8052f73 --- /dev/null +++ b/include/uapi/linux/seg6.h @@ -0,0 +1,54 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_SEG6_H +#define _UAPI_LINUX_SEG6_H + +/* + * SRH + */ +struct ipv6_sr_hdr { + __u8 nexthdr; + __u8 hdrlen; + __u8 type; + __u8 segments_left; + __u8 first_segment; + __u8 flag_1; + __u8 flag_2; + __u8 reserved; + + struct in6_addr segments[0]; +}; + +#define SR6_FLAG1_CLEANUP (1 << 7) +#define SR6_FLAG1_PROTECTED (1 << 6) +#define SR6_FLAG1_OAM (1 << 5) +#define SR6_FLAG1_ALERT (1 << 4) +#define SR6_FLAG1_HMAC (1 << 3) + +#define SR6_TLV_INGRESS 1 +#define SR6_TLV_EGRESS 2 +#define SR6_TLV_OPAQUE 3 +#define SR6_TLV_PADDING 4 +#define SR6_TLV_HMAC 5 + +#define sr_has_cleanup(srh) ((srh)->flag_1 & SR6_FLAG1_CLEANUP) +#define sr_has_hmac(srh) ((srh)->flag_1 & SR6_FLAG1_HMAC) + +struct sr6_tlv { + __u8 type; + __u8 len; + __u8 data[0]; +}; + +#endif diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 060dd9922018..2ac6cb460af0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -238,6 +238,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .use_oif_addrs_only = 0, .ignore_routes_with_linkdown = 0, .keep_addr_on_down = 0, + .seg6_enabled = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -284,6 +285,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .use_oif_addrs_only = 0, .ignore_routes_with_linkdown = 0, .keep_addr_on_down = 0, + .seg6_enabled = 0, }; /* Check if a valid qdisc is available */ @@ -4944,6 +4946,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast; array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na; array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down; + array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled; } static inline size_t inet6_ifla6_size(void) @@ -6035,6 +6038,13 @@ static const struct ctl_table addrconf_sysctl[] = { .proc_handler = proc_dointvec, }, + { + .procname = "seg6_enabled", + .data = &ipv6_devconf.seg6_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { /* sentinel */ } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 139ceb68bd37..b8ba3961ff8a 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -47,6 +47,8 @@ #if IS_ENABLED(CONFIG_IPV6_MIP6) #include #endif +#include +#include #include @@ -286,6 +288,175 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) return -1; } +static void seg6_update_csum(struct sk_buff *skb) +{ + struct ipv6_sr_hdr *hdr; + struct in6_addr *addr; + __be32 from, to; + + /* srh is at transport offset and seg_left is already decremented + * but daddr is not yet updated with next segment + */ + + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); + addr = hdr->segments + hdr->segments_left; + + hdr->segments_left++; + from = *(__be32 *)hdr; + + hdr->segments_left--; + to = *(__be32 *)hdr; + + /* update skb csum with diff resulting from seg_left decrement */ + + update_csum_diff4(skb, from, to); + + /* compute csum diff between current and next segment and update */ + + update_csum_diff16(skb, (__be32 *)(&ipv6_hdr(skb)->daddr), + (__be32 *)addr); +} + +static int ipv6_srh_rcv(struct sk_buff *skb) +{ + struct inet6_skb_parm *opt = IP6CB(skb); + struct net *net = dev_net(skb->dev); + struct ipv6_sr_hdr *hdr; + struct inet6_dev *idev; + struct in6_addr *addr; + bool cleanup = false; + int accept_seg6; + + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); + + idev = __in6_dev_get(skb->dev); + + accept_seg6 = net->ipv6.devconf_all->seg6_enabled; + if (accept_seg6 > idev->cnf.seg6_enabled) + accept_seg6 = idev->cnf.seg6_enabled; + + if (!accept_seg6) { + kfree_skb(skb); + return -1; + } + +looped_back: + if (hdr->segments_left > 0) { + if (hdr->nexthdr != NEXTHDR_IPV6 && hdr->segments_left == 1 && + sr_has_cleanup(hdr)) + cleanup = true; + } else { + if (hdr->nexthdr == NEXTHDR_IPV6) { + int offset = (hdr->hdrlen + 1) << 3; + + skb_postpull_rcsum(skb, skb_network_header(skb), + skb_network_header_len(skb)); + + if (!pskb_pull(skb, offset)) { + kfree_skb(skb); + return -1; + } + skb_postpull_rcsum(skb, skb_transport_header(skb), + offset); + + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb->encapsulation = 0; + + __skb_tunnel_rx(skb, skb->dev, net); + + netif_rx(skb); + return -1; + } + + opt->srcrt = skb_network_header_len(skb); + opt->lastopt = opt->srcrt; + skb->transport_header += (hdr->hdrlen + 1) << 3; + opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb); + + return 1; + } + + if (hdr->segments_left >= (hdr->hdrlen >> 1)) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, + ((&hdr->segments_left) - + skb_network_header(skb))); + kfree_skb(skb); + return -1; + } + + if (skb_cloned(skb)) { + if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTDISCARDS); + kfree_skb(skb); + return -1; + } + } + + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); + + hdr->segments_left--; + addr = hdr->segments + hdr->segments_left; + + skb_push(skb, sizeof(struct ipv6hdr)); + + if (skb->ip_summed == CHECKSUM_COMPLETE) + seg6_update_csum(skb); + + ipv6_hdr(skb)->daddr = *addr; + + if (cleanup) { + int srhlen = (hdr->hdrlen + 1) << 3; + int nh = hdr->nexthdr; + + skb_pull_rcsum(skb, sizeof(struct ipv6hdr) + srhlen); + memmove(skb_network_header(skb) + srhlen, + skb_network_header(skb), + (unsigned char *)hdr - skb_network_header(skb)); + skb->network_header += srhlen; + ipv6_hdr(skb)->nexthdr = nh; + ipv6_hdr(skb)->payload_len = htons(skb->len - + sizeof(struct ipv6hdr)); + skb_push_rcsum(skb, sizeof(struct ipv6hdr)); + } + + skb_dst_drop(skb); + + ip6_route_input(skb); + + if (skb_dst(skb)->error) { + dst_input(skb); + return -1; + } + + if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) { + if (ipv6_hdr(skb)->hop_limit <= 1) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); + icmpv6_send(skb, ICMPV6_TIME_EXCEED, + ICMPV6_EXC_HOPLIMIT, 0); + kfree_skb(skb); + return -1; + } + ipv6_hdr(skb)->hop_limit--; + + /* be sure that srh is still present before reinjecting */ + if (!cleanup) { + skb_pull(skb, sizeof(struct ipv6hdr)); + goto looped_back; + } + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); + } + + dst_input(skb); + + return -1; +} + /******************************** Routing header. ********************************/ @@ -326,6 +497,10 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) return -1; } + /* segment routing */ + if (hdr->type == IPV6_SRCRT_TYPE_4) + return ipv6_srh_rcv(skb); + looped_back: if (hdr->segments_left == 0) { switch (hdr->type) { -- cgit v1.2.3 From 915d7e5e5930b4f01d0971d93b9b25ed17d221aa Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Tue, 8 Nov 2016 14:57:40 +0100 Subject: ipv6: sr: add code base for control plane support of SR-IPv6 This patch adds the necessary hooks and structures to provide support for SR-IPv6 control plane, essentially the Generic Netlink commands that will be used for userspace control over the Segment Routing kernel structures. The genetlink commands provide control over two different structures: tunnel source and HMAC data. The tunnel source is the source address that will be used by default when encapsulating packets into an outer IPv6 header + SRH. If the tunnel source is set to :: then an address of the outgoing interface will be selected as the source. The HMAC commands currently just return ENOTSUPP and will be implemented in a future patch. Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/linux/seg6_genl.h | 6 ++ include/net/netns/ipv6.h | 1 + include/net/seg6.h | 16 +++ include/uapi/linux/seg6_genl.h | 32 ++++++ net/ipv6/Makefile | 2 +- net/ipv6/af_inet6.c | 9 +- net/ipv6/seg6.c | 214 +++++++++++++++++++++++++++++++++++++++++ 7 files changed, 278 insertions(+), 2 deletions(-) create mode 100644 include/linux/seg6_genl.h create mode 100644 include/uapi/linux/seg6_genl.h create mode 100644 net/ipv6/seg6.c (limited to 'include/linux') diff --git a/include/linux/seg6_genl.h b/include/linux/seg6_genl.h new file mode 100644 index 000000000000..d6c3fb4f3734 --- /dev/null +++ b/include/linux/seg6_genl.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_GENL_H +#define _LINUX_SEG6_GENL_H + +#include + +#endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 10d0848f5b8a..de7745e2edcc 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -85,6 +85,7 @@ struct netns_ipv6 { #endif atomic_t dev_addr_genid; atomic_t fib6_sernum; + struct seg6_pernet_data *seg6_data; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) diff --git a/include/net/seg6.h b/include/net/seg6.h index 4dd52a7e95f1..7c7b8ed39661 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -14,6 +14,9 @@ #ifndef _NET_SEG6_H #define _NET_SEG6_H +#include +#include + static inline void update_csum_diff4(struct sk_buff *skb, __be32 from, __be32 to) { @@ -33,4 +36,17 @@ static inline void update_csum_diff16(struct sk_buff *skb, __be32 *from, skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); } +struct seg6_pernet_data { + struct mutex lock; + struct in6_addr __rcu *tun_src; +}; + +static inline struct seg6_pernet_data *seg6_pernet(struct net *net) +{ + return net->ipv6.seg6_data; +} + +extern int seg6_init(void); +extern void seg6_exit(void); + #endif diff --git a/include/uapi/linux/seg6_genl.h b/include/uapi/linux/seg6_genl.h new file mode 100644 index 000000000000..fcf1c60d7df3 --- /dev/null +++ b/include/uapi/linux/seg6_genl.h @@ -0,0 +1,32 @@ +#ifndef _UAPI_LINUX_SEG6_GENL_H +#define _UAPI_LINUX_SEG6_GENL_H + +#define SEG6_GENL_NAME "SEG6" +#define SEG6_GENL_VERSION 0x1 + +enum { + SEG6_ATTR_UNSPEC, + SEG6_ATTR_DST, + SEG6_ATTR_DSTLEN, + SEG6_ATTR_HMACKEYID, + SEG6_ATTR_SECRET, + SEG6_ATTR_SECRETLEN, + SEG6_ATTR_ALGID, + SEG6_ATTR_HMACINFO, + __SEG6_ATTR_MAX, +}; + +#define SEG6_ATTR_MAX (__SEG6_ATTR_MAX - 1) + +enum { + SEG6_CMD_UNSPEC, + SEG6_CMD_SETHMAC, + SEG6_CMD_DUMPHMAC, + SEG6_CMD_SET_TUNSRC, + SEG6_CMD_GET_TUNSRC, + __SEG6_CMD_MAX, +}; + +#define SEG6_CMD_MAX (__SEG6_CMD_MAX - 1) + +#endif diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index c174ccb340a1..c92010d62afc 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -9,7 +9,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ - udp_offload.o + udp_offload.o seg6.o ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c86911b63f8a..d424f3a3737a 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -61,6 +61,7 @@ #include #endif #include +#include #include #include @@ -991,6 +992,10 @@ static int __init inet6_init(void) if (err) goto calipso_fail; + err = seg6_init(); + if (err) + goto seg6_fail; + #ifdef CONFIG_SYSCTL err = ipv6_sysctl_register(); if (err) @@ -1001,8 +1006,10 @@ out: #ifdef CONFIG_SYSCTL sysctl_fail: - calipso_exit(); + seg6_exit(); #endif +seg6_fail: + calipso_exit(); calipso_fail: pingv6_exit(); pingv6_fail: diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c new file mode 100644 index 000000000000..e246b0ba12ac --- /dev/null +++ b/net/ipv6/seg6.c @@ -0,0 +1,214 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +static struct genl_family seg6_genl_family; + +static const struct nla_policy seg6_genl_policy[SEG6_ATTR_MAX + 1] = { + [SEG6_ATTR_DST] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, + [SEG6_ATTR_DSTLEN] = { .type = NLA_S32, }, + [SEG6_ATTR_HMACKEYID] = { .type = NLA_U32, }, + [SEG6_ATTR_SECRET] = { .type = NLA_BINARY, }, + [SEG6_ATTR_SECRETLEN] = { .type = NLA_U8, }, + [SEG6_ATTR_ALGID] = { .type = NLA_U8, }, + [SEG6_ATTR_HMACINFO] = { .type = NLA_NESTED, }, +}; + +static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info) +{ + return -ENOTSUPP; +} + +static int seg6_genl_set_tunsrc(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct in6_addr *val, *t_old, *t_new; + struct seg6_pernet_data *sdata; + + sdata = seg6_pernet(net); + + if (!info->attrs[SEG6_ATTR_DST]) + return -EINVAL; + + val = nla_data(info->attrs[SEG6_ATTR_DST]); + t_new = kmemdup(val, sizeof(*val), GFP_KERNEL); + + mutex_lock(&sdata->lock); + + t_old = sdata->tun_src; + rcu_assign_pointer(sdata->tun_src, t_new); + + mutex_unlock(&sdata->lock); + + synchronize_net(); + kfree(t_old); + + return 0; +} + +static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct in6_addr *tun_src; + struct sk_buff *msg; + void *hdr; + + msg = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, + &seg6_genl_family, 0, SEG6_CMD_GET_TUNSRC); + if (!hdr) + goto free_msg; + + rcu_read_lock(); + tun_src = rcu_dereference(seg6_pernet(net)->tun_src); + + if (nla_put(msg, SEG6_ATTR_DST, sizeof(struct in6_addr), tun_src)) + goto nla_put_failure; + + rcu_read_unlock(); + + genlmsg_end(msg, hdr); + genlmsg_reply(msg, info); + + return 0; + +nla_put_failure: + rcu_read_unlock(); + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -ENOMEM; +} + +static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb) +{ + return -ENOTSUPP; +} + +static int __net_init seg6_net_init(struct net *net) +{ + struct seg6_pernet_data *sdata; + + sdata = kzalloc(sizeof(*sdata), GFP_KERNEL); + if (!sdata) + return -ENOMEM; + + mutex_init(&sdata->lock); + + sdata->tun_src = kzalloc(sizeof(*sdata->tun_src), GFP_KERNEL); + if (!sdata->tun_src) { + kfree(sdata); + return -ENOMEM; + } + + net->ipv6.seg6_data = sdata; + + return 0; +} + +static void __net_exit seg6_net_exit(struct net *net) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + + kfree(sdata->tun_src); + kfree(sdata); +} + +static struct pernet_operations ip6_segments_ops = { + .init = seg6_net_init, + .exit = seg6_net_exit, +}; + +static const struct genl_ops seg6_genl_ops[] = { + { + .cmd = SEG6_CMD_SETHMAC, + .doit = seg6_genl_sethmac, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = SEG6_CMD_DUMPHMAC, + .dumpit = seg6_genl_dumphmac, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = SEG6_CMD_SET_TUNSRC, + .doit = seg6_genl_set_tunsrc, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = SEG6_CMD_GET_TUNSRC, + .doit = seg6_genl_get_tunsrc, + .policy = seg6_genl_policy, + .flags = GENL_ADMIN_PERM, + }, +}; + +static struct genl_family seg6_genl_family __ro_after_init = { + .hdrsize = 0, + .name = SEG6_GENL_NAME, + .version = SEG6_GENL_VERSION, + .maxattr = SEG6_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .ops = seg6_genl_ops, + .n_ops = ARRAY_SIZE(seg6_genl_ops), + .module = THIS_MODULE, +}; + +int __init seg6_init(void) +{ + int err = -ENOMEM; + + err = genl_register_family(&seg6_genl_family); + if (err) + goto out; + + err = register_pernet_subsys(&ip6_segments_ops); + if (err) + goto out_unregister_genl; + + pr_info("Segment Routing with IPv6\n"); + +out: + return err; +out_unregister_genl: + genl_unregister_family(&seg6_genl_family); + goto out; +} + +void seg6_exit(void) +{ + unregister_pernet_subsys(&ip6_segments_ops); + genl_unregister_family(&seg6_genl_family); +} -- cgit v1.2.3 From 6c8702c60b88651072460f3f4026c7dfe2521d12 Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Tue, 8 Nov 2016 14:57:41 +0100 Subject: ipv6: sr: add support for SRH encapsulation and injection with lwtunnels This patch creates a new type of interfaceless lightweight tunnel (SEG6), enabling the encapsulation and injection of SRH within locally emitted packets and forwarded packets. >From a configuration viewpoint, a seg6 tunnel would be configured as follows: ip -6 ro ad fc00::1/128 encap seg6 mode encap segs fc42::1,fc42::2,fc42::3 dev eth0 Any packet whose destination address is fc00::1 would thus be encapsulated within an outer IPv6 header containing the SRH with three segments, and would actually be routed to the first segment of the list. If `mode inline' was specified instead of `mode encap', then the SRH would be directly inserted after the IPv6 header without outer encapsulation. The inline mode is only available if CONFIG_IPV6_SEG6_INLINE is enabled. This feature was made configurable because direct header insertion may break several mechanisms such as PMTUD or IPSec AH. Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/linux/seg6_iptunnel.h | 6 + include/net/seg6.h | 6 + include/uapi/linux/lwtunnel.h | 1 + include/uapi/linux/seg6_iptunnel.h | 44 ++++ net/core/lwtunnel.c | 2 + net/ipv6/Kconfig | 12 ++ net/ipv6/Makefile | 2 +- net/ipv6/seg6.c | 44 ++++ net/ipv6/seg6_iptunnel.c | 410 +++++++++++++++++++++++++++++++++++++ 9 files changed, 526 insertions(+), 1 deletion(-) create mode 100644 include/linux/seg6_iptunnel.h create mode 100644 include/uapi/linux/seg6_iptunnel.h create mode 100644 net/ipv6/seg6_iptunnel.c (limited to 'include/linux') diff --git a/include/linux/seg6_iptunnel.h b/include/linux/seg6_iptunnel.h new file mode 100644 index 000000000000..5377cf6a5a02 --- /dev/null +++ b/include/linux/seg6_iptunnel.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_IPTUNNEL_H +#define _LINUX_SEG6_IPTUNNEL_H + +#include + +#endif diff --git a/include/net/seg6.h b/include/net/seg6.h index 7c7b8ed39661..ff5da0ce83e9 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -16,6 +16,8 @@ #include #include +#include +#include static inline void update_csum_diff4(struct sk_buff *skb, __be32 from, __be32 to) @@ -48,5 +50,9 @@ static inline struct seg6_pernet_data *seg6_pernet(struct net *net) extern int seg6_init(void); extern void seg6_exit(void); +extern int seg6_iptunnel_init(void); +extern void seg6_iptunnel_exit(void); + +extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len); #endif diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h index a478fe80e203..453cc6215bfd 100644 --- a/include/uapi/linux/lwtunnel.h +++ b/include/uapi/linux/lwtunnel.h @@ -9,6 +9,7 @@ enum lwtunnel_encap_types { LWTUNNEL_ENCAP_IP, LWTUNNEL_ENCAP_ILA, LWTUNNEL_ENCAP_IP6, + LWTUNNEL_ENCAP_SEG6, __LWTUNNEL_ENCAP_MAX, }; diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h new file mode 100644 index 000000000000..0f7dbd280a9c --- /dev/null +++ b/include/uapi/linux/seg6_iptunnel.h @@ -0,0 +1,44 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_SEG6_IPTUNNEL_H +#define _UAPI_LINUX_SEG6_IPTUNNEL_H + +enum { + SEG6_IPTUNNEL_UNSPEC, + SEG6_IPTUNNEL_SRH, + __SEG6_IPTUNNEL_MAX, +}; +#define SEG6_IPTUNNEL_MAX (__SEG6_IPTUNNEL_MAX - 1) + +struct seg6_iptunnel_encap { + int mode; + struct ipv6_sr_hdr srh[0]; +}; + +#define SEG6_IPTUN_ENCAP_SIZE(x) ((sizeof(*x)) + (((x)->srh->hdrlen + 1) << 3)) + +enum { + SEG6_IPTUN_MODE_INLINE, + SEG6_IPTUN_MODE_ENCAP, +}; + +static inline size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) +{ + int encap = (tuninfo->mode == SEG6_IPTUN_MODE_ENCAP); + + return ((tuninfo->srh->hdrlen + 1) << 3) + + (encap * sizeof(struct ipv6hdr)); +} + +#endif diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 88fd64250b02..03976e939818 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -39,6 +39,8 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) return "MPLS"; case LWTUNNEL_ENCAP_ILA: return "ILA"; + case LWTUNNEL_ENCAP_SEG6: + return "SEG6"; case LWTUNNEL_ENCAP_IP6: case LWTUNNEL_ENCAP_IP: case LWTUNNEL_ENCAP_NONE: diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 2343e4f2e0bf..1123a001d729 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -289,4 +289,16 @@ config IPV6_PIMSM_V2 Support for IPv6 PIM multicast routing protocol PIM-SMv2. If unsure, say N. +config IPV6_SEG6_INLINE + bool "IPv6: direct Segment Routing Header insertion " + depends on IPV6 + ---help--- + Support for direct insertion of the Segment Routing Header, + also known as inline mode. Be aware that direct insertion of + extension headers (as opposed to encapsulation) may break + multiple mechanisms such as PMTUD or IPSec AH. Use this feature + only if you know exactly what you are doing. + + If unsure, say N. + endif # IPV6 diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index c92010d62afc..59ee92fb3689 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -9,7 +9,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ - udp_offload.o seg6.o + udp_offload.o seg6.o seg6_iptunnel.o ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index e246b0ba12ac..9c78053e67e0 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -26,6 +26,43 @@ #include #include +bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len) +{ + int trailing; + unsigned int tlv_offset; + + if (srh->type != IPV6_SRCRT_TYPE_4) + return false; + + if (((srh->hdrlen + 1) << 3) != len) + return false; + + if (srh->segments_left != srh->first_segment) + return false; + + tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4); + + trailing = len - tlv_offset; + if (trailing < 0) + return false; + + while (trailing) { + struct sr6_tlv *tlv; + unsigned int tlv_len; + + tlv = (struct sr6_tlv *)((unsigned char *)srh + tlv_offset); + tlv_len = sizeof(*tlv) + tlv->len; + + trailing -= tlv_len; + if (trailing < 0) + return false; + + tlv_offset += tlv_len; + } + + return true; +} + static struct genl_family seg6_genl_family; static const struct nla_policy seg6_genl_policy[SEG6_ATTR_MAX + 1] = { @@ -198,10 +235,16 @@ int __init seg6_init(void) if (err) goto out_unregister_genl; + err = seg6_iptunnel_init(); + if (err) + goto out_unregister_pernet; + pr_info("Segment Routing with IPv6\n"); out: return err; +out_unregister_pernet: + unregister_pernet_subsys(&ip6_segments_ops); out_unregister_genl: genl_unregister_family(&seg6_genl_family); goto out; @@ -209,6 +252,7 @@ out_unregister_genl: void seg6_exit(void) { + seg6_iptunnel_exit(); unregister_pernet_subsys(&ip6_segments_ops); genl_unregister_family(&seg6_genl_family); } diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c new file mode 100644 index 000000000000..39762b2fa7a2 --- /dev/null +++ b/net/ipv6/seg6_iptunnel.c @@ -0,0 +1,410 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_DST_CACHE +#include +#endif + +struct seg6_lwt { +#ifdef CONFIG_DST_CACHE + struct dst_cache cache; +#endif + struct seg6_iptunnel_encap tuninfo[0]; +}; + +static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt) +{ + return (struct seg6_lwt *)lwt->data; +} + +static inline struct seg6_iptunnel_encap * +seg6_encap_lwtunnel(struct lwtunnel_state *lwt) +{ + return seg6_lwt_lwtunnel(lwt)->tuninfo; +} + +static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = { + [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY }, +}; + +int nla_put_srh(struct sk_buff *skb, int attrtype, + struct seg6_iptunnel_encap *tuninfo) +{ + struct seg6_iptunnel_encap *data; + struct nlattr *nla; + int len; + + len = SEG6_IPTUN_ENCAP_SIZE(tuninfo); + + nla = nla_reserve(skb, attrtype, len); + if (!nla) + return -EMSGSIZE; + + data = nla_data(nla); + memcpy(data, tuninfo, len); + + return 0; +} + +static void set_tun_src(struct net *net, struct net_device *dev, + struct in6_addr *daddr, struct in6_addr *saddr) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + struct in6_addr *tun_src; + + rcu_read_lock(); + + tun_src = rcu_dereference(sdata->tun_src); + + if (!ipv6_addr_any(tun_src)) { + memcpy(saddr, tun_src, sizeof(struct in6_addr)); + } else { + ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC, + saddr); + } + + rcu_read_unlock(); +} + +/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ +static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) +{ + struct net *net = dev_net(skb_dst(skb)->dev); + struct ipv6hdr *hdr, *inner_hdr; + struct ipv6_sr_hdr *isrh; + int hdrlen, tot_len, err; + + hdrlen = (osrh->hdrlen + 1) << 3; + tot_len = hdrlen + sizeof(*hdr); + + err = pskb_expand_head(skb, tot_len, 0, GFP_ATOMIC); + if (unlikely(err)) + return err; + + inner_hdr = ipv6_hdr(skb); + + skb_push(skb, tot_len); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + hdr = ipv6_hdr(skb); + + /* inherit tc, flowlabel and hlim + * hlim will be decremented in ip6_forward() afterwards and + * decapsulation will overwrite inner hlim with outer hlim + */ + ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), + ip6_flowlabel(inner_hdr)); + hdr->hop_limit = inner_hdr->hop_limit; + hdr->nexthdr = NEXTHDR_ROUTING; + + isrh = (void *)hdr + sizeof(*hdr); + memcpy(isrh, osrh, hdrlen); + + isrh->nexthdr = NEXTHDR_IPV6; + + hdr->daddr = isrh->segments[isrh->first_segment]; + set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr); + + skb_postpush_rcsum(skb, hdr, tot_len); + + return 0; +} + +/* insert an SRH within an IPv6 packet, just after the IPv6 header */ +#ifdef CONFIG_IPV6_SEG6_INLINE +static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) +{ + struct ipv6hdr *hdr, *oldhdr; + struct ipv6_sr_hdr *isrh; + int hdrlen, err; + + hdrlen = (osrh->hdrlen + 1) << 3; + + err = pskb_expand_head(skb, hdrlen, 0, GFP_ATOMIC); + if (unlikely(err)) + return err; + + oldhdr = ipv6_hdr(skb); + + skb_pull(skb, sizeof(struct ipv6hdr)); + skb_postpull_rcsum(skb, skb_network_header(skb), + sizeof(struct ipv6hdr)); + + skb_push(skb, sizeof(struct ipv6hdr) + hdrlen); + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + + hdr = ipv6_hdr(skb); + + memmove(hdr, oldhdr, sizeof(*hdr)); + + isrh = (void *)hdr + sizeof(*hdr); + memcpy(isrh, osrh, hdrlen); + + isrh->nexthdr = hdr->nexthdr; + hdr->nexthdr = NEXTHDR_ROUTING; + + isrh->segments[0] = hdr->daddr; + hdr->daddr = isrh->segments[isrh->first_segment]; + + skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen); + + return 0; +} +#endif + +static int seg6_do_srh(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct seg6_iptunnel_encap *tinfo; + int err = 0; + + tinfo = seg6_encap_lwtunnel(dst->lwtstate); + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + switch (tinfo->mode) { +#ifdef CONFIG_IPV6_SEG6_INLINE + case SEG6_IPTUN_MODE_INLINE: + err = seg6_do_srh_inline(skb, tinfo->srh); + skb_reset_inner_headers(skb); + break; +#endif + case SEG6_IPTUN_MODE_ENCAP: + err = seg6_do_srh_encap(skb, tinfo->srh); + break; + } + + if (err) + return err; + + ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + + skb_set_inner_protocol(skb, skb->protocol); + + return 0; +} + +int seg6_input(struct sk_buff *skb) +{ + int err; + + err = seg6_do_srh(skb); + if (unlikely(err)) { + kfree_skb(skb); + return err; + } + + skb_dst_drop(skb); + ip6_route_input(skb); + + return dst_input(skb); +} + +int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct dst_entry *orig_dst = skb_dst(skb); + struct dst_entry *dst = NULL; + struct seg6_lwt *slwt; + int err = -EINVAL; + + err = seg6_do_srh(skb); + if (unlikely(err)) + goto drop; + + slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); + +#ifdef CONFIG_DST_CACHE + dst = dst_cache_get(&slwt->cache); +#endif + + if (unlikely(!dst)) { + struct ipv6hdr *hdr = ipv6_hdr(skb); + struct flowi6 fl6; + + fl6.daddr = hdr->daddr; + fl6.saddr = hdr->saddr; + fl6.flowlabel = ip6_flowinfo(hdr); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = hdr->nexthdr; + + dst = ip6_route_output(net, NULL, &fl6); + if (dst->error) { + err = dst->error; + dst_release(dst); + goto drop; + } + +#ifdef CONFIG_DST_CACHE + dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); +#endif + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + return dst_output(net, sk, skb); +drop: + kfree_skb(skb); + return err; +} + +static int seg6_build_state(struct net_device *dev, struct nlattr *nla, + unsigned int family, const void *cfg, + struct lwtunnel_state **ts) +{ + struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1]; + struct seg6_iptunnel_encap *tuninfo; + struct lwtunnel_state *newts; + int tuninfo_len, min_size; + struct seg6_lwt *slwt; + int err; + + err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla, + seg6_iptunnel_policy); + + if (err < 0) + return err; + + if (!tb[SEG6_IPTUNNEL_SRH]) + return -EINVAL; + + tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]); + tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]); + + /* tuninfo must contain at least the iptunnel encap structure, + * the SRH and one segment + */ + min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) + + sizeof(struct in6_addr); + if (tuninfo_len < min_size) + return -EINVAL; + + switch (tuninfo->mode) { +#ifdef CONFIG_IPV6_SEG6_INLINE + case SEG6_IPTUN_MODE_INLINE: + break; +#endif + case SEG6_IPTUN_MODE_ENCAP: + break; + default: + return -EINVAL; + } + + /* verify that SRH is consistent */ + if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo))) + return -EINVAL; + + newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt)); + if (!newts) + return -ENOMEM; + + slwt = seg6_lwt_lwtunnel(newts); + +#ifdef CONFIG_DST_CACHE + err = dst_cache_init(&slwt->cache, GFP_KERNEL); + if (err) { + kfree(newts); + return err; + } +#endif + + memcpy(&slwt->tuninfo, tuninfo, tuninfo_len); + + newts->type = LWTUNNEL_ENCAP_SEG6; + newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT | + LWTUNNEL_STATE_INPUT_REDIRECT; + newts->headroom = seg6_lwt_headroom(tuninfo); + + *ts = newts; + + return 0; +} + +#ifdef CONFIG_DST_CACHE +static void seg6_destroy_state(struct lwtunnel_state *lwt) +{ + dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache); +} +#endif + +static int seg6_fill_encap_info(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); + + if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo)) + return -EMSGSIZE; + + return 0; +} + +static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate) +{ + struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); + + return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo)); +} + +static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a); + struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b); + int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr); + + if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr)) + return 1; + + return memcmp(a_hdr, b_hdr, len); +} + +static const struct lwtunnel_encap_ops seg6_iptun_ops = { + .build_state = seg6_build_state, +#ifdef CONFIG_DST_CACHE + .destroy_state = seg6_destroy_state, +#endif + .output = seg6_output, + .input = seg6_input, + .fill_encap = seg6_fill_encap_info, + .get_encap_size = seg6_encap_nlsize, + .cmp_encap = seg6_encap_cmp, +}; + +int __init seg6_iptunnel_init(void) +{ + return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); +} + +void seg6_iptunnel_exit(void) +{ + lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6); +} -- cgit v1.2.3 From bf355b8d2c30a289232042cacc1cfaea4923936c Mon Sep 17 00:00:00 2001 From: David Lebrun Date: Tue, 8 Nov 2016 14:57:42 +0100 Subject: ipv6: sr: add core files for SR HMAC support This patch adds the necessary functions to compute and check the HMAC signature of an SR-enabled packet. Two HMAC algorithms are supported: hmac(sha1) and hmac(sha256). In order to avoid dynamic memory allocation for each HMAC computation, a per-cpu ring buffer is allocated for this purpose. A new per-interface sysctl called seg6_require_hmac is added, allowing a user-defined policy for processing HMAC-signed SR-enabled packets. A value of -1 means that the HMAC field will always be ignored. A value of 0 means that if an HMAC field is present, its validity will be enforced (the packet is dropped is the signature is incorrect). Finally, a value of 1 means that any SR-enabled packet that does not contain an HMAC signature or whose signature is incorrect will be dropped. Signed-off-by: David Lebrun Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 + include/linux/seg6_hmac.h | 6 + include/net/seg6.h | 4 + include/net/seg6_hmac.h | 62 ++++++ include/uapi/linux/ipv6.h | 1 + include/uapi/linux/seg6_hmac.h | 21 ++ net/ipv6/Kconfig | 12 + net/ipv6/Makefile | 1 + net/ipv6/addrconf.c | 18 ++ net/ipv6/seg6_hmac.c | 484 +++++++++++++++++++++++++++++++++++++++++ 10 files changed, 612 insertions(+) create mode 100644 include/linux/seg6_hmac.h create mode 100644 include/net/seg6_hmac.h create mode 100644 include/uapi/linux/seg6_hmac.h create mode 100644 net/ipv6/seg6_hmac.c (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 68d3f71f0abf..93756585521f 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -65,6 +65,9 @@ struct ipv6_devconf { __s32 use_oif_addrs_only; __s32 keep_addr_on_down; __s32 seg6_enabled; +#ifdef CONFIG_IPV6_SEG6_HMAC + __s32 seg6_require_hmac; +#endif struct ctl_table_header *sysctl_header; }; diff --git a/include/linux/seg6_hmac.h b/include/linux/seg6_hmac.h new file mode 100644 index 000000000000..da437ebdc6cd --- /dev/null +++ b/include/linux/seg6_hmac.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SEG6_HMAC_H +#define _LINUX_SEG6_HMAC_H + +#include + +#endif diff --git a/include/net/seg6.h b/include/net/seg6.h index ff5da0ce83e9..4e0357517d79 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -18,6 +18,7 @@ #include #include #include +#include static inline void update_csum_diff4(struct sk_buff *skb, __be32 from, __be32 to) @@ -41,6 +42,9 @@ static inline void update_csum_diff16(struct sk_buff *skb, __be32 *from, struct seg6_pernet_data { struct mutex lock; struct in6_addr __rcu *tun_src; +#ifdef CONFIG_IPV6_SEG6_HMAC + struct rhashtable hmac_infos; +#endif }; static inline struct seg6_pernet_data *seg6_pernet(struct net *net) diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h new file mode 100644 index 000000000000..69c3a106056b --- /dev/null +++ b/include/net/seg6_hmac.h @@ -0,0 +1,62 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _NET_SEG6_HMAC_H +#define _NET_SEG6_HMAC_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SEG6_HMAC_MAX_DIGESTSIZE 160 +#define SEG6_HMAC_RING_SIZE 256 + +struct seg6_hmac_info { + struct rhash_head node; + struct rcu_head rcu; + + u32 hmackeyid; + char secret[SEG6_HMAC_SECRET_LEN]; + u8 slen; + u8 alg_id; +}; + +struct seg6_hmac_algo { + u8 alg_id; + char name[64]; + struct crypto_shash * __percpu *tfms; + struct shash_desc * __percpu *shashs; +}; + +extern int seg6_hmac_compute(struct seg6_hmac_info *hinfo, + struct ipv6_sr_hdr *hdr, struct in6_addr *saddr, + u8 *output); +extern struct seg6_hmac_info *seg6_hmac_info_lookup(struct net *net, u32 key); +extern int seg6_hmac_info_add(struct net *net, u32 key, + struct seg6_hmac_info *hinfo); +extern int seg6_hmac_info_del(struct net *net, u32 key); +extern int seg6_push_hmac(struct net *net, struct in6_addr *saddr, + struct ipv6_sr_hdr *srh); +extern bool seg6_hmac_validate_skb(struct sk_buff *skb); +extern int seg6_hmac_init(void); +extern void seg6_hmac_exit(void); +extern int seg6_hmac_net_init(struct net *net); +extern void seg6_hmac_net_exit(struct net *net); + +#endif diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 7ff1d654e333..53561be1ac21 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -180,6 +180,7 @@ enum { DEVCONF_KEEP_ADDR_ON_DOWN, DEVCONF_RTR_SOLICIT_MAX_INTERVAL, DEVCONF_SEG6_ENABLED, + DEVCONF_SEG6_REQUIRE_HMAC, DEVCONF_MAX }; diff --git a/include/uapi/linux/seg6_hmac.h b/include/uapi/linux/seg6_hmac.h new file mode 100644 index 000000000000..b652dfd51bc5 --- /dev/null +++ b/include/uapi/linux/seg6_hmac.h @@ -0,0 +1,21 @@ +#ifndef _UAPI_LINUX_SEG6_HMAC_H +#define _UAPI_LINUX_SEG6_HMAC_H + +#include + +#define SEG6_HMAC_SECRET_LEN 64 +#define SEG6_HMAC_FIELD_LEN 32 + +struct sr6_tlv_hmac { + struct sr6_tlv tlvhdr; + __u16 reserved; + __be32 hmackeyid; + __u8 hmac[SEG6_HMAC_FIELD_LEN]; +}; + +enum { + SEG6_HMAC_ALGO_SHA1 = 1, + SEG6_HMAC_ALGO_SHA256 = 2, +}; + +#endif diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 1123a001d729..0f00811a785f 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -301,4 +301,16 @@ config IPV6_SEG6_INLINE If unsure, say N. +config IPV6_SEG6_HMAC + bool "IPv6: Segment Routing HMAC support" + depends on IPV6 + select CRYPTO_HMAC + select CRYPTO_SHA1 + select CRYPTO_SHA256 + ---help--- + Support for HMAC signature generation and verification + of SR-enabled packets. + + If unsure, say N. + endif # IPV6 diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 59ee92fb3689..129cad2ba960 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -44,6 +44,7 @@ obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o obj-$(CONFIG_IPV6_GRE) += ip6_gre.o obj-$(CONFIG_IPV6_FOU) += fou6.o +obj-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2ac6cb460af0..86219c0a0104 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -239,6 +239,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .ignore_routes_with_linkdown = 0, .keep_addr_on_down = 0, .seg6_enabled = 0, +#ifdef CONFIG_IPV6_SEG6_HMAC + .seg6_require_hmac = 0, +#endif }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -286,6 +289,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .ignore_routes_with_linkdown = 0, .keep_addr_on_down = 0, .seg6_enabled = 0, +#ifdef CONFIG_IPV6_SEG6_HMAC + .seg6_require_hmac = 0, +#endif }; /* Check if a valid qdisc is available */ @@ -4947,6 +4953,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na; array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down; array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled; +#ifdef CONFIG_IPV6_SEG6_HMAC + array[DEVCONF_SEG6_REQUIRE_HMAC] = cnf->seg6_require_hmac; +#endif } static inline size_t inet6_ifla6_size(void) @@ -6045,6 +6054,15 @@ static const struct ctl_table addrconf_sysctl[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_IPV6_SEG6_HMAC + { + .procname = "seg6_require_hmac", + .data = &ipv6_devconf.seg6_require_hmac, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif { /* sentinel */ } diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c new file mode 100644 index 000000000000..ef1c8a46e7ac --- /dev/null +++ b/net/ipv6/seg6_hmac.c @@ -0,0 +1,484 @@ +/* + * SR-IPv6 implementation -- HMAC functions + * + * Author: + * David Lebrun + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +static char * __percpu *hmac_ring; + +static int seg6_hmac_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) +{ + const struct seg6_hmac_info *hinfo = obj; + + return (hinfo->hmackeyid != *(__u32 *)arg->key); +} + +static inline void seg6_hinfo_release(struct seg6_hmac_info *hinfo) +{ + kfree_rcu(hinfo, rcu); +} + +static void seg6_free_hi(void *ptr, void *arg) +{ + struct seg6_hmac_info *hinfo = (struct seg6_hmac_info *)ptr; + + if (hinfo) + seg6_hinfo_release(hinfo); +} + +static const struct rhashtable_params rht_params = { + .head_offset = offsetof(struct seg6_hmac_info, node), + .key_offset = offsetof(struct seg6_hmac_info, hmackeyid), + .key_len = sizeof(u32), + .automatic_shrinking = true, + .obj_cmpfn = seg6_hmac_cmpfn, +}; + +static struct seg6_hmac_algo hmac_algos[] = { + { + .alg_id = SEG6_HMAC_ALGO_SHA1, + .name = "hmac(sha1)", + }, + { + .alg_id = SEG6_HMAC_ALGO_SHA256, + .name = "hmac(sha256)", + }, +}; + +static struct sr6_tlv_hmac *seg6_get_tlv_hmac(struct ipv6_sr_hdr *srh) +{ + struct sr6_tlv_hmac *tlv; + + if (srh->hdrlen < (srh->first_segment + 1) * 2 + 5) + return NULL; + + if (!sr_has_hmac(srh)) + return NULL; + + tlv = (struct sr6_tlv_hmac *) + ((char *)srh + ((srh->hdrlen + 1) << 3) - 40); + + if (tlv->tlvhdr.type != SR6_TLV_HMAC || tlv->tlvhdr.len != 38) + return NULL; + + return tlv; +} + +static struct seg6_hmac_algo *__hmac_get_algo(u8 alg_id) +{ + struct seg6_hmac_algo *algo; + int i, alg_count; + + alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo); + for (i = 0; i < alg_count; i++) { + algo = &hmac_algos[i]; + if (algo->alg_id == alg_id) + return algo; + } + + return NULL; +} + +static int __do_hmac(struct seg6_hmac_info *hinfo, const char *text, u8 psize, + u8 *output, int outlen) +{ + struct seg6_hmac_algo *algo; + struct crypto_shash *tfm; + struct shash_desc *shash; + int ret, dgsize; + + algo = __hmac_get_algo(hinfo->alg_id); + if (!algo) + return -ENOENT; + + tfm = *this_cpu_ptr(algo->tfms); + + dgsize = crypto_shash_digestsize(tfm); + if (dgsize > outlen) { + pr_debug("sr-ipv6: __do_hmac: digest size too big (%d / %d)\n", + dgsize, outlen); + return -ENOMEM; + } + + ret = crypto_shash_setkey(tfm, hinfo->secret, hinfo->slen); + if (ret < 0) { + pr_debug("sr-ipv6: crypto_shash_setkey failed: err %d\n", ret); + goto failed; + } + + shash = *this_cpu_ptr(algo->shashs); + shash->tfm = tfm; + + ret = crypto_shash_digest(shash, text, psize, output); + if (ret < 0) { + pr_debug("sr-ipv6: crypto_shash_digest failed: err %d\n", ret); + goto failed; + } + + return dgsize; + +failed: + return ret; +} + +int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr, + struct in6_addr *saddr, u8 *output) +{ + __be32 hmackeyid = cpu_to_be32(hinfo->hmackeyid); + u8 tmp_out[SEG6_HMAC_MAX_DIGESTSIZE]; + int plen, i, dgsize, wrsize; + char *ring, *off; + + /* a 160-byte buffer for digest output allows to store highest known + * hash function (RadioGatun) with up to 1216 bits + */ + + /* saddr(16) + first_seg(1) + cleanup(1) + keyid(4) + seglist(16n) */ + plen = 16 + 1 + 1 + 4 + (hdr->first_segment + 1) * 16; + + /* this limit allows for 14 segments */ + if (plen >= SEG6_HMAC_RING_SIZE) + return -EMSGSIZE; + + /* Let's build the HMAC text on the ring buffer. The text is composed + * as follows, in order: + * + * 1. Source IPv6 address (128 bits) + * 2. first_segment value (8 bits) + * 3. cleanup flag (8 bits: highest bit is cleanup value, others are 0) + * 4. HMAC Key ID (32 bits) + * 5. All segments in the segments list (n * 128 bits) + */ + + local_bh_disable(); + ring = *this_cpu_ptr(hmac_ring); + off = ring; + + /* source address */ + memcpy(off, saddr, 16); + off += 16; + + /* first_segment value */ + *off++ = hdr->first_segment; + + /* cleanup flag */ + *off++ = !!(sr_has_cleanup(hdr)) << 7; + + /* HMAC Key ID */ + memcpy(off, &hmackeyid, 4); + off += 4; + + /* all segments in the list */ + for (i = 0; i < hdr->first_segment + 1; i++) { + memcpy(off, hdr->segments + i, 16); + off += 16; + } + + dgsize = __do_hmac(hinfo, ring, plen, tmp_out, + SEG6_HMAC_MAX_DIGESTSIZE); + local_bh_enable(); + + if (dgsize < 0) + return dgsize; + + wrsize = SEG6_HMAC_FIELD_LEN; + if (wrsize > dgsize) + wrsize = dgsize; + + memset(output, 0, SEG6_HMAC_FIELD_LEN); + memcpy(output, tmp_out, wrsize); + + return 0; +} +EXPORT_SYMBOL(seg6_hmac_compute); + +/* checks if an incoming SR-enabled packet's HMAC status matches + * the incoming policy. + * + * called with rcu_read_lock() + */ +bool seg6_hmac_validate_skb(struct sk_buff *skb) +{ + u8 hmac_output[SEG6_HMAC_FIELD_LEN]; + struct net *net = dev_net(skb->dev); + struct seg6_hmac_info *hinfo; + struct sr6_tlv_hmac *tlv; + struct ipv6_sr_hdr *srh; + struct inet6_dev *idev; + + idev = __in6_dev_get(skb->dev); + + srh = (struct ipv6_sr_hdr *)skb_transport_header(skb); + + tlv = seg6_get_tlv_hmac(srh); + + /* mandatory check but no tlv */ + if (idev->cnf.seg6_require_hmac > 0 && !tlv) + return false; + + /* no check */ + if (idev->cnf.seg6_require_hmac < 0) + return true; + + /* check only if present */ + if (idev->cnf.seg6_require_hmac == 0 && !tlv) + return true; + + /* now, seg6_require_hmac >= 0 && tlv */ + + hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid)); + if (!hinfo) + return false; + + if (seg6_hmac_compute(hinfo, srh, &ipv6_hdr(skb)->saddr, hmac_output)) + return false; + + if (memcmp(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN) != 0) + return false; + + return true; +} +EXPORT_SYMBOL(seg6_hmac_validate_skb); + +/* called with rcu_read_lock() */ +struct seg6_hmac_info *seg6_hmac_info_lookup(struct net *net, u32 key) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + struct seg6_hmac_info *hinfo; + + hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params); + + return hinfo; +} +EXPORT_SYMBOL(seg6_hmac_info_lookup); + +int seg6_hmac_info_add(struct net *net, u32 key, struct seg6_hmac_info *hinfo) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + int err; + + err = rhashtable_lookup_insert_fast(&sdata->hmac_infos, &hinfo->node, + rht_params); + + return err; +} +EXPORT_SYMBOL(seg6_hmac_info_add); + +int seg6_hmac_info_del(struct net *net, u32 key) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + struct seg6_hmac_info *hinfo; + int err = -ENOENT; + + hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params); + if (!hinfo) + goto out; + + err = rhashtable_remove_fast(&sdata->hmac_infos, &hinfo->node, + rht_params); + if (err) + goto out; + + seg6_hinfo_release(hinfo); + +out: + return err; +} +EXPORT_SYMBOL(seg6_hmac_info_del); + +int seg6_push_hmac(struct net *net, struct in6_addr *saddr, + struct ipv6_sr_hdr *srh) +{ + struct seg6_hmac_info *hinfo; + struct sr6_tlv_hmac *tlv; + int err = -ENOENT; + + tlv = seg6_get_tlv_hmac(srh); + if (!tlv) + return -EINVAL; + + rcu_read_lock(); + + hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid)); + if (!hinfo) + goto out; + + memset(tlv->hmac, 0, SEG6_HMAC_FIELD_LEN); + err = seg6_hmac_compute(hinfo, srh, saddr, tlv->hmac); + +out: + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL(seg6_push_hmac); + +static int seg6_hmac_init_ring(void) +{ + int i; + + hmac_ring = alloc_percpu(char *); + + if (!hmac_ring) + return -ENOMEM; + + for_each_possible_cpu(i) { + char *ring = kzalloc(SEG6_HMAC_RING_SIZE, GFP_KERNEL); + + if (!ring) + return -ENOMEM; + + *per_cpu_ptr(hmac_ring, i) = ring; + } + + return 0; +} + +static int seg6_hmac_init_algo(void) +{ + struct seg6_hmac_algo *algo; + struct crypto_shash *tfm; + struct shash_desc *shash; + int i, alg_count, cpu; + + alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo); + + for (i = 0; i < alg_count; i++) { + struct crypto_shash **p_tfm; + int shsize; + + algo = &hmac_algos[i]; + algo->tfms = alloc_percpu(struct crypto_shash *); + if (!algo->tfms) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + tfm = crypto_alloc_shash(algo->name, 0, GFP_KERNEL); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + p_tfm = per_cpu_ptr(algo->tfms, cpu); + *p_tfm = tfm; + } + + p_tfm = this_cpu_ptr(algo->tfms); + tfm = *p_tfm; + + shsize = sizeof(*shash) + crypto_shash_descsize(tfm); + + algo->shashs = alloc_percpu(struct shash_desc *); + if (!algo->shashs) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + shash = kzalloc(shsize, GFP_KERNEL); + if (!shash) + return -ENOMEM; + *per_cpu_ptr(algo->shashs, cpu) = shash; + } + } + + return 0; +} + +int __init seg6_hmac_init(void) +{ + int ret; + + ret = seg6_hmac_init_ring(); + if (ret < 0) + goto out; + + ret = seg6_hmac_init_algo(); + +out: + return ret; +} +EXPORT_SYMBOL(seg6_hmac_init); + +int __net_init seg6_hmac_net_init(struct net *net) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + + rhashtable_init(&sdata->hmac_infos, &rht_params); + + return 0; +} +EXPORT_SYMBOL(seg6_hmac_net_init); + +void seg6_hmac_exit(void) +{ + struct seg6_hmac_algo *algo = NULL; + int i, alg_count, cpu; + + for_each_possible_cpu(i) { + char *ring = *per_cpu_ptr(hmac_ring, i); + + kfree(ring); + } + free_percpu(hmac_ring); + + alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo); + for (i = 0; i < alg_count; i++) { + algo = &hmac_algos[i]; + for_each_possible_cpu(cpu) { + struct crypto_shash *tfm; + struct shash_desc *shash; + + shash = *per_cpu_ptr(algo->shashs, cpu); + kfree(shash); + tfm = *per_cpu_ptr(algo->tfms, cpu); + crypto_free_shash(tfm); + } + free_percpu(algo->tfms); + free_percpu(algo->shashs); + } +} +EXPORT_SYMBOL(seg6_hmac_exit); + +void __net_exit seg6_hmac_net_exit(struct net *net) +{ + struct seg6_pernet_data *sdata = seg6_pernet(net); + + rhashtable_free_and_destroy(&sdata->hmac_infos, seg6_free_hi, NULL); +} +EXPORT_SYMBOL(seg6_hmac_net_exit); -- cgit v1.2.3 From 149d6ad83663b4820ca09c9d40b1eea7f5c22c2b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Nov 2016 11:07:28 -0800 Subject: net: napi_hash_add() is no longer exported There are no more users except from net/core/dev.c napi_hash_add() can now be static. Signed-off-by: Eric Dumazet Cc: Michael Chan Signed-off-by: David S. Miller --- include/linux/netdevice.h | 11 ----------- net/core/dev.c | 3 +-- 2 files changed, 1 insertion(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 66fd61c681d9..d64135a0ab71 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -467,17 +467,6 @@ static inline void napi_complete(struct napi_struct *n) return napi_complete_done(n, 0); } -/** - * napi_hash_add - add a NAPI to global hashtable - * @napi: NAPI context - * - * Generate a new napi_id and store a @napi under it in napi_hash. - * Used for busy polling (CONFIG_NET_RX_BUSY_POLL). - * Note: This is normally automatically done from netif_napi_add(), - * so might disappear in a future Linux version. - */ -void napi_hash_add(struct napi_struct *napi); - /** * napi_hash_del - remove a NAPI from global table * @napi: NAPI context diff --git a/net/core/dev.c b/net/core/dev.c index c9837fa08dfc..7385c1a152fd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5017,7 +5017,7 @@ EXPORT_SYMBOL(sk_busy_loop); #endif /* CONFIG_NET_RX_BUSY_POLL */ -void napi_hash_add(struct napi_struct *napi) +static void napi_hash_add(struct napi_struct *napi) { if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) || test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) @@ -5037,7 +5037,6 @@ void napi_hash_add(struct napi_struct *napi) spin_unlock(&napi_hash_lock); } -EXPORT_SYMBOL_GPL(napi_hash_add); /* Warning : caller is responsible to make sure rcu grace period * is respected before freeing memory containing @napi -- cgit v1.2.3 From d8d26354191399627bac9cf0da0667b0f5178686 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Tue, 8 Nov 2016 22:49:16 +0100 Subject: ptp: Introduce a high resolution frequency adjustment method. The internal PTP Hardware Clock (PHC) interface limits the resolution for frequency adjustments to one part per billion. However, some hardware devices allow finer adjustment, and making use of the increased resolution improves synchronization measurably on such devices. This patch adds an alternative method that allows finer frequency tuning by passing the scaled ppm value to PHC drivers. This value comes from user space, and it has a resolution of about 0.015 ppb. We also deprecate the older method, anticipating its removal once existing drivers have been converted over. Signed-off-by: Richard Cochran Suggested-by: Ulrik De Bie Signed-off-by: David S. Miller --- drivers/ptp/ptp_clock.c | 5 ++++- include/linux/ptp_clock_kernel.h | 8 ++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 86280b7e41f3..9c13381b6966 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -153,7 +153,10 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx) s32 ppb = scaled_ppm_to_ppb(tx->freq); if (ppb > ops->max_adj || ppb < -ops->max_adj) return -ERANGE; - err = ops->adjfreq(ops, ppb); + if (ops->adjfine) + err = ops->adjfine(ops, tx->freq); + else + err = ops->adjfreq(ops, ppb); ptp->dialed_frequency = tx->freq; } else if (tx->modes == 0) { tx->freq = ptp->dialed_frequency; diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 5ad54fc66cf0..b76d47aba564 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -58,7 +58,14 @@ struct system_device_crosststamp; * * clock operations * + * @adjfine: Adjusts the frequency of the hardware clock. + * parameter scaled_ppm: Desired frequency offset from + * nominal frequency in parts per million, but with a + * 16 bit binary fractional field. + * * @adjfreq: Adjusts the frequency of the hardware clock. + * This method is deprecated. New drivers should implement + * the @adjfine method instead. * parameter delta: Desired frequency offset from nominal frequency * in parts per billion * @@ -108,6 +115,7 @@ struct ptp_clock_info { int n_pins; int pps; struct ptp_pin_desc *pin_config; + int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm); int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta); int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts); -- cgit v1.2.3 From 7bdc9650f03604b06ba7434fab694e8ae8ca782d Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 19 Oct 2016 19:40:02 -0700 Subject: remoteproc: Introduce subdevices A subdevice is an abstract entity that can be used to tie actions to the booting and shutting down of a remote processor. The subdevice object is expected to be embedded in concrete implementations, allowing for a variety of use cases to be implemented. Signed-off-by: Bjorn Andersson --- drivers/remoteproc/remoteproc_core.c | 72 ++++++++++++++++++++++++++++++++++++ include/linux/remoteproc.h | 22 +++++++++++ 2 files changed, 94 insertions(+) (limited to 'include/linux') diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index b1860949d106..b5e314fe1f4c 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -736,6 +736,34 @@ static int rproc_handle_resources(struct rproc *rproc, int len, return ret; } +static int rproc_probe_subdevices(struct rproc *rproc) +{ + struct rproc_subdev *subdev; + int ret; + + list_for_each_entry(subdev, &rproc->subdevs, node) { + ret = subdev->probe(subdev); + if (ret) + goto unroll_registration; + } + + return 0; + +unroll_registration: + list_for_each_entry_continue_reverse(subdev, &rproc->subdevs, node) + subdev->remove(subdev); + + return ret; +} + +static void rproc_remove_subdevices(struct rproc *rproc) +{ + struct rproc_subdev *subdev; + + list_for_each_entry(subdev, &rproc->subdevs, node) + subdev->remove(subdev); +} + /** * rproc_resource_cleanup() - clean up and free all acquired resources * @rproc: rproc handle @@ -878,12 +906,22 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw) goto clean_up_resources; } + /* probe any subdevices for the remote processor */ + ret = rproc_probe_subdevices(rproc); + if (ret) { + dev_err(dev, "failed to probe subdevices for %s: %d\n", + rproc->name, ret); + goto stop_rproc; + } + rproc->state = RPROC_RUNNING; dev_info(dev, "remote processor %s is now up\n", rproc->name); return 0; +stop_rproc: + rproc->ops->stop(rproc); clean_up_resources: rproc_resource_cleanup(rproc); clean_up: @@ -1121,6 +1159,9 @@ void rproc_shutdown(struct rproc *rproc) if (!atomic_dec_and_test(&rproc->power)) goto out; + /* remove any subdevices for the remote processor */ + rproc_remove_subdevices(rproc); + /* power off the remote processor */ ret = rproc->ops->stop(rproc); if (ret) { @@ -1372,6 +1413,7 @@ struct rproc *rproc_alloc(struct device *dev, const char *name, INIT_LIST_HEAD(&rproc->mappings); INIT_LIST_HEAD(&rproc->traces); INIT_LIST_HEAD(&rproc->rvdevs); + INIT_LIST_HEAD(&rproc->subdevs); INIT_WORK(&rproc->crash_handler, rproc_crash_handler_work); init_completion(&rproc->crash_comp); @@ -1458,6 +1500,36 @@ int rproc_del(struct rproc *rproc) } EXPORT_SYMBOL(rproc_del); +/** + * rproc_add_subdev() - add a subdevice to a remoteproc + * @rproc: rproc handle to add the subdevice to + * @subdev: subdev handle to register + * @probe: function to call when the rproc boots + * @remove: function to call when the rproc shuts down + */ +void rproc_add_subdev(struct rproc *rproc, + struct rproc_subdev *subdev, + int (*probe)(struct rproc_subdev *subdev), + void (*remove)(struct rproc_subdev *subdev)) +{ + subdev->probe = probe; + subdev->remove = remove; + + list_add_tail(&subdev->node, &rproc->subdevs); +} +EXPORT_SYMBOL(rproc_add_subdev); + +/** + * rproc_remove_subdev() - remove a subdevice from a remoteproc + * @rproc: rproc handle to remove the subdevice from + * @subdev: subdev handle, previously registered with rproc_add_subdev() + */ +void rproc_remove_subdev(struct rproc *rproc, struct rproc_subdev *subdev) +{ + list_del(&subdev->node); +} +EXPORT_SYMBOL(rproc_remove_subdev); + /** * rproc_report_crash() - rproc crash reporter function * @rproc: remote processor diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 940e4cf2ac48..f6d5e66854e4 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -400,6 +400,7 @@ enum rproc_crash_type { * @firmware_loading_complete: marks e/o asynchronous firmware loading * @bootaddr: address of first instruction to boot rproc with (optional) * @rvdevs: list of remote virtio devices + * @subdevs: list of subdevices, to following the running state * @notifyids: idr for dynamically assigning rproc-wide unique notify ids * @index: index of this rproc device * @crash_handler: workqueue for handling a crash @@ -431,6 +432,7 @@ struct rproc { struct completion firmware_loading_complete; u32 bootaddr; struct list_head rvdevs; + struct list_head subdevs; struct idr notifyids; int index; struct work_struct crash_handler; @@ -444,6 +446,19 @@ struct rproc { bool auto_boot; }; +/** + * struct rproc_subdev - subdevice tied to a remoteproc + * @node: list node related to the rproc subdevs list + * @probe: probe function, called as the rproc is started + * @remove: remove function, called as the rproc is stopped + */ +struct rproc_subdev { + struct list_head node; + + int (*probe)(struct rproc_subdev *subdev); + void (*remove)(struct rproc_subdev *subdev); +}; + /* we currently support only two vrings per rvdev */ #define RVDEV_NUM_VRINGS 2 @@ -511,4 +526,11 @@ static inline struct rproc *vdev_to_rproc(struct virtio_device *vdev) return rvdev->rproc; } +void rproc_add_subdev(struct rproc *rproc, + struct rproc_subdev *subdev, + int (*probe)(struct rproc_subdev *subdev), + void (*remove)(struct rproc_subdev *subdev)); + +void rproc_remove_subdev(struct rproc *rproc, struct rproc_subdev *subdev); + #endif /* REMOTEPROC_H */ -- cgit v1.2.3 From c97db7cc7778e34a53b42d58c766f0ec0e30d580 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Sep 2016 14:57:19 +0800 Subject: base: soc: Introduce soc_device_match() interface We keep running into cases where device drivers want to know the exact version of the a SoC they are currently running on. In the past, this has usually been done through a vendor specific API that can be called by a driver, or by directly accessing some kind of version register that is not part of the device itself but that belongs to a global register area of the chip. Common reasons for doing this include: - A machine is not using devicetree or similar for passing data about on-chip devices, but just announces their presence using boot-time platform devices, and the machine code itself does not care about the revision. - There is existing firmware or boot loaders with existing DT binaries with generic compatible strings that do not identify the particular revision of each device, but the driver knows which SoC revisions include which part. - A prerelease version of a chip has some quirks and we are using the same version of the bootloader and the DT blob on both the prerelease and the final version. An update of the DT binding seems inappropriate because that would involve maintaining multiple copies of the dts and/or bootloader. This patch introduces the soc_device_match() interface that is meant to work like of_match_node() but instead of identifying the version of a device, it identifies the SoC itself using a vendor-agnostic interface. Unlike of_match_node(), we do not do an exact string compare but instead use glob_match() to allow wildcards in strings. Signed-off-by: Arnd Bergmann Signed-off-by: Yangbo Lu Signed-off-by: Geert Uytterhoeven Acked-by: Greg Kroah-Hartman --- drivers/base/Kconfig | 1 + drivers/base/soc.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/sys_soc.h | 3 +++ 3 files changed, 70 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index fdf44cac08e6..991b21e1f89b 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -235,6 +235,7 @@ config GENERIC_CPU_AUTOPROBE config SOC_BUS bool + select GLOB source "drivers/base/regmap/Kconfig" diff --git a/drivers/base/soc.c b/drivers/base/soc.c index 028cef377fd4..04ee597fc3a3 100644 --- a/drivers/base/soc.c +++ b/drivers/base/soc.c @@ -13,6 +13,7 @@ #include #include #include +#include static DEFINE_IDA(soc_ida); @@ -168,3 +169,68 @@ static int __init soc_bus_register(void) return bus_register(&soc_bus_type); } core_initcall(soc_bus_register); + +static int soc_device_match_one(struct device *dev, void *arg) +{ + struct soc_device *soc_dev = container_of(dev, struct soc_device, dev); + const struct soc_device_attribute *match = arg; + + if (match->machine && + !glob_match(match->machine, soc_dev->attr->machine)) + return 0; + + if (match->family && + !glob_match(match->family, soc_dev->attr->family)) + return 0; + + if (match->revision && + !glob_match(match->revision, soc_dev->attr->revision)) + return 0; + + if (match->soc_id && + !glob_match(match->soc_id, soc_dev->attr->soc_id)) + return 0; + + return 1; +} + +/* + * soc_device_match - identify the SoC in the machine + * @matches: zero-terminated array of possible matches + * + * returns the first matching entry of the argument array, or NULL + * if none of them match. + * + * This function is meant as a helper in place of of_match_node() + * in cases where either no device tree is available or the information + * in a device node is insufficient to identify a particular variant + * by its compatible strings or other properties. For new devices, + * the DT binding should always provide unique compatible strings + * that allow the use of of_match_node() instead. + * + * The calling function can use the .data entry of the + * soc_device_attribute to pass a structure or function pointer for + * each entry. + */ +const struct soc_device_attribute *soc_device_match( + const struct soc_device_attribute *matches) +{ + int ret = 0; + + if (!matches) + return NULL; + + while (!ret) { + if (!(matches->machine || matches->family || + matches->revision || matches->soc_id)) + break; + ret = bus_for_each_dev(&soc_bus_type, NULL, (void *)matches, + soc_device_match_one); + if (!ret) + matches++; + else + return matches; + } + return NULL; +} +EXPORT_SYMBOL_GPL(soc_device_match); diff --git a/include/linux/sys_soc.h b/include/linux/sys_soc.h index 2739ccb69571..9f5eb06f9fd8 100644 --- a/include/linux/sys_soc.h +++ b/include/linux/sys_soc.h @@ -13,6 +13,7 @@ struct soc_device_attribute { const char *family; const char *revision; const char *soc_id; + const void *data; }; /** @@ -34,4 +35,6 @@ void soc_device_unregister(struct soc_device *soc_dev); */ struct device *soc_device_to_device(struct soc_device *soc); +const struct soc_device_attribute *soc_device_match( + const struct soc_device_attribute *matches); #endif /* __SOC_BUS_H */ -- cgit v1.2.3 From da65a1589dacc7ec44ea0557a14d70a39d991f32 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 26 Oct 2016 15:13:15 +0200 Subject: base: soc: Provide a dummy implementation of soc_device_match() Provide a dummy implementation of soc_device_match(), to allow compiling drivers that may be used on SoCs both with and without CONFIG_SOC_BUS, and for compile testing. Signed-off-by: Geert Uytterhoeven --- include/linux/sys_soc.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sys_soc.h b/include/linux/sys_soc.h index 9f5eb06f9fd8..bed223b70217 100644 --- a/include/linux/sys_soc.h +++ b/include/linux/sys_soc.h @@ -35,6 +35,12 @@ void soc_device_unregister(struct soc_device *soc_dev); */ struct device *soc_device_to_device(struct soc_device *soc); +#ifdef CONFIG_SOC_BUS const struct soc_device_attribute *soc_device_match( const struct soc_device_attribute *matches); +#else +static inline const struct soc_device_attribute *soc_device_match( + const struct soc_device_attribute *matches) { return NULL; } +#endif + #endif /* __SOC_BUS_H */ -- cgit v1.2.3 From 2da16a6948ca8f025e2c226ea4fc32baa6b90f27 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 10 Nov 2016 11:17:25 +0100 Subject: netfilter: ipset: Remove extra whitespaces in ip_set.h Remove unnecessary whitespaces. Ported from a patch proposed by Sergey Popovich . Suggested-by: Sergey Popovich Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 83b9a2e0d8d4..5b1fd090f34b 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -336,14 +336,15 @@ ip_set_update_counter(struct ip_set_counter *counter, static inline void ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo, - const struct ip_set_ext *ext, - struct ip_set_ext *mext, u32 flags) + const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags) { - mext->skbmark = skbinfo->skbmark; - mext->skbmarkmask = skbinfo->skbmarkmask; - mext->skbprio = skbinfo->skbprio; - mext->skbqueue = skbinfo->skbqueue; + mext->skbmark = skbinfo->skbmark; + mext->skbmarkmask = skbinfo->skbmarkmask; + mext->skbprio = skbinfo->skbprio; + mext->skbqueue = skbinfo->skbqueue; } + static inline bool ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo) { -- cgit v1.2.3 From da9fbfa76f32a031cb70b11e9fa650e30c85d040 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 10 Nov 2016 11:24:15 +0100 Subject: netfilter: ipset: Mark some helper args as const. Mark some of the helpers arguments as const. Ported from a patch proposed by Sergey Popovich . Suggested-by: Sergey Popovich Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 4 ++-- include/linux/netfilter/ipset/ip_set_comment.h | 2 +- include/linux/netfilter/ipset/ip_set_timeout.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 5b1fd090f34b..524467f933bf 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -346,7 +346,7 @@ ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo, } static inline bool -ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo) +ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo) { /* Send nonzero parameters only */ return ((skbinfo->skbmark || skbinfo->skbmarkmask) && @@ -373,7 +373,7 @@ ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, } static inline bool -ip_set_put_counter(struct sk_buff *skb, struct ip_set_counter *counter) +ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter) { return nla_put_net64(skb, IPSET_ATTR_BYTES, cpu_to_be64(ip_set_get_bytes(counter)), diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h index 8d0248525957..bae5c7609be2 100644 --- a/include/linux/netfilter/ipset/ip_set_comment.h +++ b/include/linux/netfilter/ipset/ip_set_comment.h @@ -43,7 +43,7 @@ ip_set_init_comment(struct ip_set_comment *comment, /* Used only when dumping a set, protected by rcu_read_lock_bh() */ static inline int -ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment) +ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment) { struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c); diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h index 1d6a935c1ac5..bfb3531fd88a 100644 --- a/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -40,7 +40,7 @@ ip_set_timeout_uget(struct nlattr *tb) } static inline bool -ip_set_timeout_expired(unsigned long *t) +ip_set_timeout_expired(const unsigned long *t) { return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t); } @@ -63,7 +63,7 @@ ip_set_timeout_set(unsigned long *timeout, u32 value) } static inline u32 -ip_set_timeout_get(unsigned long *timeout) +ip_set_timeout_get(const unsigned long *timeout) { return *timeout == IPSET_ELEM_PERMANENT ? 0 : jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC; -- cgit v1.2.3 From 7ffea37957b900422ce8b82e9651f7a0a6fac733 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 10 Nov 2016 11:31:03 +0100 Subject: netfilter: ipset: Headers file cleanup Group counter helper functions together. Ported from a patch proposed by Sergey Popovich . Suggested-by: Sergey Popovich Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 42 +++++++++++++++++----------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 524467f933bf..1ea28e30a6dd 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -334,6 +334,27 @@ ip_set_update_counter(struct ip_set_counter *counter, } } +static inline bool +ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter) +{ + return nla_put_net64(skb, IPSET_ATTR_BYTES, + cpu_to_be64(ip_set_get_bytes(counter)), + IPSET_ATTR_PAD) || + nla_put_net64(skb, IPSET_ATTR_PACKETS, + cpu_to_be64(ip_set_get_packets(counter)), + IPSET_ATTR_PAD); +} + +static inline void +ip_set_init_counter(struct ip_set_counter *counter, + const struct ip_set_ext *ext) +{ + if (ext->bytes != ULLONG_MAX) + atomic64_set(&(counter)->bytes, (long long)(ext->bytes)); + if (ext->packets != ULLONG_MAX) + atomic64_set(&(counter)->packets, (long long)(ext->packets)); +} + static inline void ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo, const struct ip_set_ext *ext, @@ -372,27 +393,6 @@ ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, skbinfo->skbqueue = ext->skbqueue; } -static inline bool -ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter) -{ - return nla_put_net64(skb, IPSET_ATTR_BYTES, - cpu_to_be64(ip_set_get_bytes(counter)), - IPSET_ATTR_PAD) || - nla_put_net64(skb, IPSET_ATTR_PACKETS, - cpu_to_be64(ip_set_get_packets(counter)), - IPSET_ATTR_PAD); -} - -static inline void -ip_set_init_counter(struct ip_set_counter *counter, - const struct ip_set_ext *ext) -{ - if (ext->bytes != ULLONG_MAX) - atomic64_set(&(counter)->bytes, (long long)(ext->bytes)); - if (ext->packets != ULLONG_MAX) - atomic64_set(&(counter)->packets, (long long)(ext->packets)); -} - /* Netlink CB args */ enum { IPSET_CB_NET = 0, /* net namespace */ -- cgit v1.2.3 From bec810d973003b30bc477146904af6bd93fd2df8 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 5 May 2015 17:13:28 +0200 Subject: netfilter: ipset: Improve skbinfo get/init helpers Use struct ip_set_skbinfo in struct ip_set_ext instead of open coded fields and assign structure members in get/init helpers instead of copying members one by one. Explicitly note that struct ip_set_skbinfo must be padded to prevent non-aligned access in the extension blob. Ported from a patch proposed by Sergey Popovich . Suggested-by: Sergey Popovich Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 30 +++++++++++------------------- net/netfilter/ipset/ip_set_core.c | 12 ++++++------ net/netfilter/xt_set.c | 12 +++++++----- 3 files changed, 24 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 1ea28e30a6dd..780262124632 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -92,17 +92,6 @@ struct ip_set_ext_type { extern const struct ip_set_ext_type ip_set_extensions[]; -struct ip_set_ext { - u64 packets; - u64 bytes; - u32 timeout; - u32 skbmark; - u32 skbmarkmask; - u32 skbprio; - u16 skbqueue; - char *comment; -}; - struct ip_set_counter { atomic64_t bytes; atomic64_t packets; @@ -122,6 +111,15 @@ struct ip_set_skbinfo { u32 skbmarkmask; u32 skbprio; u16 skbqueue; + u16 __pad; +}; + +struct ip_set_ext { + struct ip_set_skbinfo skbinfo; + u64 packets; + u64 bytes; + char *comment; + u32 timeout; }; struct ip_set; @@ -360,10 +358,7 @@ ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) { - mext->skbmark = skbinfo->skbmark; - mext->skbmarkmask = skbinfo->skbmarkmask; - mext->skbprio = skbinfo->skbprio; - mext->skbqueue = skbinfo->skbqueue; + mext->skbinfo = *skbinfo; } static inline bool @@ -387,10 +382,7 @@ static inline void ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, const struct ip_set_ext *ext) { - skbinfo->skbmark = ext->skbmark; - skbinfo->skbmarkmask = ext->skbmarkmask; - skbinfo->skbprio = ext->skbprio; - skbinfo->skbqueue = ext->skbqueue; + *skbinfo = ext->skbinfo; } /* Netlink CB args */ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 3f1b945a24d5..bfacccff7196 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -426,20 +426,20 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], if (!SET_WITH_SKBINFO(set)) return -IPSET_ERR_SKBINFO; fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK])); - ext->skbmark = fullmark >> 32; - ext->skbmarkmask = fullmark & 0xffffffff; + ext->skbinfo.skbmark = fullmark >> 32; + ext->skbinfo.skbmarkmask = fullmark & 0xffffffff; } if (tb[IPSET_ATTR_SKBPRIO]) { if (!SET_WITH_SKBINFO(set)) return -IPSET_ERR_SKBINFO; - ext->skbprio = be32_to_cpu(nla_get_be32( - tb[IPSET_ATTR_SKBPRIO])); + ext->skbinfo.skbprio = + be32_to_cpu(nla_get_be32(tb[IPSET_ATTR_SKBPRIO])); } if (tb[IPSET_ATTR_SKBQUEUE]) { if (!SET_WITH_SKBINFO(set)) return -IPSET_ERR_SKBINFO; - ext->skbqueue = be16_to_cpu(nla_get_be16( - tb[IPSET_ATTR_SKBQUEUE])); + ext->skbinfo.skbqueue = + be16_to_cpu(nla_get_be16(tb[IPSET_ATTR_SKBQUEUE])); } return 0; } diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 1bfede7be418..64285702afd5 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -423,6 +423,8 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) /* Revision 3 target */ +#define MOPT(opt, member) ((opt).ext.skbinfo.member) + static unsigned int set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) { @@ -453,14 +455,14 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) if (!ret) return XT_CONTINUE; if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBMARK) - skb->mark = (skb->mark & ~(map_opt.ext.skbmarkmask)) - ^ (map_opt.ext.skbmark); + skb->mark = (skb->mark & ~MOPT(map_opt,skbmarkmask)) + ^ MOPT(map_opt, skbmark); if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBPRIO) - skb->priority = map_opt.ext.skbprio; + skb->priority = MOPT(map_opt, skbprio); if ((map_opt.cmdflags & IPSET_FLAG_MAP_SKBQUEUE) && skb->dev && - skb->dev->real_num_tx_queues > map_opt.ext.skbqueue) - skb_set_queue_mapping(skb, map_opt.ext.skbqueue); + skb->dev->real_num_tx_queues > MOPT(map_opt, skbqueue)) + skb_set_queue_mapping(skb, MOPT(map_opt, skbqueue)); } return XT_CONTINUE; } -- cgit v1.2.3 From 1d0d6bd61d495d271b9774a15fbea93e4875474b Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 6 May 2015 07:27:28 +0200 Subject: netfilter: ipset: Use kmalloc() in comment extension helper Allocate memory with kmalloc() rather than kzalloc(): the string is immediately initialized so it is unnecessary to zero out the allocated memory area. Ported from a patch proposed by Sergey Popovich . Suggested-by: Sergey Popovich Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set_comment.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h index bae5c7609be2..5444b1bbe656 100644 --- a/include/linux/netfilter/ipset/ip_set_comment.h +++ b/include/linux/netfilter/ipset/ip_set_comment.h @@ -34,7 +34,7 @@ ip_set_init_comment(struct ip_set_comment *comment, return; if (unlikely(len > IPSET_MAX_COMMENT_SIZE)) len = IPSET_MAX_COMMENT_SIZE; - c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC); + c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC); if (unlikely(!c)) return; strlcpy(c->str, ext->comment, len + 1); -- cgit v1.2.3 From 57982edc2739b4473868e7579c0185270468bae1 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 10 Oct 2016 21:34:56 +0200 Subject: netfilter: ipset: Split extensions into separate files Cleanup to separate all extensions into individual files. Ported from a patch proposed by Sergey Popovich . Suggested-by: Sergey Popovich Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 95 +------------------------- include/linux/netfilter/ipset/ip_set_counter.h | 75 ++++++++++++++++++++ include/linux/netfilter/ipset/ip_set_skbinfo.h | 46 +++++++++++++ 3 files changed, 123 insertions(+), 93 deletions(-) create mode 100644 include/linux/netfilter/ipset/ip_set_counter.h create mode 100644 include/linux/netfilter/ipset/ip_set_skbinfo.h (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 780262124632..b5bd0fb3d07b 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -292,99 +292,6 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set) return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags)); } -static inline void -ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter) -{ - atomic64_add((long long)bytes, &(counter)->bytes); -} - -static inline void -ip_set_add_packets(u64 packets, struct ip_set_counter *counter) -{ - atomic64_add((long long)packets, &(counter)->packets); -} - -static inline u64 -ip_set_get_bytes(const struct ip_set_counter *counter) -{ - return (u64)atomic64_read(&(counter)->bytes); -} - -static inline u64 -ip_set_get_packets(const struct ip_set_counter *counter) -{ - return (u64)atomic64_read(&(counter)->packets); -} - -static inline void -ip_set_update_counter(struct ip_set_counter *counter, - const struct ip_set_ext *ext, - struct ip_set_ext *mext, u32 flags) -{ - if (ext->packets != ULLONG_MAX && - !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) { - ip_set_add_bytes(ext->bytes, counter); - ip_set_add_packets(ext->packets, counter); - } - if (flags & IPSET_FLAG_MATCH_COUNTERS) { - mext->packets = ip_set_get_packets(counter); - mext->bytes = ip_set_get_bytes(counter); - } -} - -static inline bool -ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter) -{ - return nla_put_net64(skb, IPSET_ATTR_BYTES, - cpu_to_be64(ip_set_get_bytes(counter)), - IPSET_ATTR_PAD) || - nla_put_net64(skb, IPSET_ATTR_PACKETS, - cpu_to_be64(ip_set_get_packets(counter)), - IPSET_ATTR_PAD); -} - -static inline void -ip_set_init_counter(struct ip_set_counter *counter, - const struct ip_set_ext *ext) -{ - if (ext->bytes != ULLONG_MAX) - atomic64_set(&(counter)->bytes, (long long)(ext->bytes)); - if (ext->packets != ULLONG_MAX) - atomic64_set(&(counter)->packets, (long long)(ext->packets)); -} - -static inline void -ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo, - const struct ip_set_ext *ext, - struct ip_set_ext *mext, u32 flags) -{ - mext->skbinfo = *skbinfo; -} - -static inline bool -ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo) -{ - /* Send nonzero parameters only */ - return ((skbinfo->skbmark || skbinfo->skbmarkmask) && - nla_put_net64(skb, IPSET_ATTR_SKBMARK, - cpu_to_be64((u64)skbinfo->skbmark << 32 | - skbinfo->skbmarkmask), - IPSET_ATTR_PAD)) || - (skbinfo->skbprio && - nla_put_net32(skb, IPSET_ATTR_SKBPRIO, - cpu_to_be32(skbinfo->skbprio))) || - (skbinfo->skbqueue && - nla_put_net16(skb, IPSET_ATTR_SKBQUEUE, - cpu_to_be16(skbinfo->skbqueue))); -} - -static inline void -ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, - const struct ip_set_ext *ext) -{ - *skbinfo = ext->skbinfo; -} - /* Netlink CB args */ enum { IPSET_CB_NET = 0, /* net namespace */ @@ -539,6 +446,8 @@ bitmap_bytes(u32 a, u32 b) #include #include +#include +#include int ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, diff --git a/include/linux/netfilter/ipset/ip_set_counter.h b/include/linux/netfilter/ipset/ip_set_counter.h new file mode 100644 index 000000000000..bb6fba480118 --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_counter.h @@ -0,0 +1,75 @@ +#ifndef _IP_SET_COUNTER_H +#define _IP_SET_COUNTER_H + +/* Copyright (C) 2015 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifdef __KERNEL__ + +static inline void +ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter) +{ + atomic64_add((long long)bytes, &(counter)->bytes); +} + +static inline void +ip_set_add_packets(u64 packets, struct ip_set_counter *counter) +{ + atomic64_add((long long)packets, &(counter)->packets); +} + +static inline u64 +ip_set_get_bytes(const struct ip_set_counter *counter) +{ + return (u64)atomic64_read(&(counter)->bytes); +} + +static inline u64 +ip_set_get_packets(const struct ip_set_counter *counter) +{ + return (u64)atomic64_read(&(counter)->packets); +} + +static inline void +ip_set_update_counter(struct ip_set_counter *counter, + const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags) +{ + if (ext->packets != ULLONG_MAX && + !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) { + ip_set_add_bytes(ext->bytes, counter); + ip_set_add_packets(ext->packets, counter); + } + if (flags & IPSET_FLAG_MATCH_COUNTERS) { + mext->packets = ip_set_get_packets(counter); + mext->bytes = ip_set_get_bytes(counter); + } +} + +static inline bool +ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter) +{ + return nla_put_net64(skb, IPSET_ATTR_BYTES, + cpu_to_be64(ip_set_get_bytes(counter)), + IPSET_ATTR_PAD) || + nla_put_net64(skb, IPSET_ATTR_PACKETS, + cpu_to_be64(ip_set_get_packets(counter)), + IPSET_ATTR_PAD); +} + +static inline void +ip_set_init_counter(struct ip_set_counter *counter, + const struct ip_set_ext *ext) +{ + if (ext->bytes != ULLONG_MAX) + atomic64_set(&(counter)->bytes, (long long)(ext->bytes)); + if (ext->packets != ULLONG_MAX) + atomic64_set(&(counter)->packets, (long long)(ext->packets)); +} + +#endif /* __KERNEL__ */ +#endif /* _IP_SET_COUNTER_H */ diff --git a/include/linux/netfilter/ipset/ip_set_skbinfo.h b/include/linux/netfilter/ipset/ip_set_skbinfo.h new file mode 100644 index 000000000000..29d7ef2bc3fa --- /dev/null +++ b/include/linux/netfilter/ipset/ip_set_skbinfo.h @@ -0,0 +1,46 @@ +#ifndef _IP_SET_SKBINFO_H +#define _IP_SET_SKBINFO_H + +/* Copyright (C) 2015 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifdef __KERNEL__ + +static inline void +ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo, + const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags) +{ + mext->skbinfo = *skbinfo; +} + +static inline bool +ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo) +{ + /* Send nonzero parameters only */ + return ((skbinfo->skbmark || skbinfo->skbmarkmask) && + nla_put_net64(skb, IPSET_ATTR_SKBMARK, + cpu_to_be64((u64)skbinfo->skbmark << 32 | + skbinfo->skbmarkmask), + IPSET_ATTR_PAD)) || + (skbinfo->skbprio && + nla_put_net32(skb, IPSET_ATTR_SKBPRIO, + cpu_to_be32(skbinfo->skbprio))) || + (skbinfo->skbqueue && + nla_put_net16(skb, IPSET_ATTR_SKBQUEUE, + cpu_to_be16(skbinfo->skbqueue))); +} + +static inline void +ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, + const struct ip_set_ext *ext) +{ + *skbinfo = ext->skbinfo; +} + +#endif /* __KERNEL__ */ +#endif /* _IP_SET_SKBINFO_H */ -- cgit v1.2.3 From 837a90eab67edfa464dcc0ddef193449d23da408 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 10 Oct 2016 21:52:51 +0200 Subject: netfilter: ipset: Regroup ip_set_put_extensions and add extern Cleanup: group ip_set_put_extensions and ip_set_get_extensions together and add missing extern. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index b5bd0fb3d07b..7a218eb74887 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -331,6 +331,8 @@ extern size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len, size_t align); extern int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext *ext); +extern int ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, + const void *e, bool active); static inline int ip_set_get_hostipaddr4(struct nlattr *nla, u32 *ipaddr) @@ -449,10 +451,6 @@ bitmap_bytes(u32 a, u32 b) #include #include -int -ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, - const void *e, bool active); - #define IP_SET_INIT_KEXT(skb, opt, set) \ { .bytes = (skb)->len, .packets = 1, \ .timeout = ip_set_adt_opt_timeout(opt, set) } -- cgit v1.2.3 From 702b71e7c666a1c9be9d49e8cd173f0d4d1e859f Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 10 Oct 2016 22:07:41 +0200 Subject: netfilter: ipset: Add element count to all set types header It is better to list the set elements for all set types, thus the header information is uniform. Element counts are therefore added to the bitmap and list types. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 2 ++ include/linux/netfilter/ipset/ip_set_bitmap.h | 2 +- net/netfilter/ipset/ip_set_bitmap_gen.h | 10 +++++++++- net/netfilter/ipset/ip_set_hash_gen.h | 21 ++++++++++----------- net/netfilter/ipset/ip_set_list_set.c | 6 +++++- 5 files changed, 27 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 7a218eb74887..4671d740610f 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -250,6 +250,8 @@ struct ip_set { u8 flags; /* Default timeout value, if enabled */ u32 timeout; + /* Number of elements (vs timeout) */ + u32 elements; /* Element data size */ size_t dsize; /* Offsets to extensions in elements */ diff --git a/include/linux/netfilter/ipset/ip_set_bitmap.h b/include/linux/netfilter/ipset/ip_set_bitmap.h index 5e4662a71e01..366d6c0ea04f 100644 --- a/include/linux/netfilter/ipset/ip_set_bitmap.h +++ b/include/linux/netfilter/ipset/ip_set_bitmap.h @@ -6,8 +6,8 @@ #define IPSET_BITMAP_MAX_RANGE 0x0000FFFF enum { + IPSET_ADD_STORE_PLAIN_TIMEOUT = -1, IPSET_ADD_FAILED = 1, - IPSET_ADD_STORE_PLAIN_TIMEOUT, IPSET_ADD_START_STORED_TIMEOUT, }; diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 4f07b90f8ef4..1810d1c06e3d 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -83,6 +83,7 @@ mtype_flush(struct ip_set *set) if (set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set); memset(map->members, 0, map->memsize); + set->elements = 0; } /* Calculate the actual memory size of the set data */ @@ -105,7 +106,8 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; if (mtype_do_head(skb, map) || nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || - nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) + nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || + nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; @@ -149,6 +151,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (ret == IPSET_ADD_FAILED) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(x, set))) { + set->elements--; ret = 0; } else if (!(flags & IPSET_FLAG_EXIST)) { set_bit(e->id, map->members); @@ -157,6 +160,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, /* Element is re-added, cleanup extensions */ ip_set_ext_destroy(set, x); } + if (ret > 0) + set->elements--; if (SET_WITH_TIMEOUT(set)) #ifdef IP_SET_BITMAP_STORED_TIMEOUT @@ -174,6 +179,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, /* Activate element */ set_bit(e->id, map->members); + set->elements++; return 0; } @@ -190,6 +196,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, return -IPSET_ERR_EXIST; ip_set_ext_destroy(set, x); + set->elements--; if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(x, set))) return -IPSET_ERR_EXIST; @@ -285,6 +292,7 @@ mtype_gc(unsigned long ul_set) if (ip_set_timeout_expired(ext_timeout(x, set))) { clear_bit(id, map->members); ip_set_ext_destroy(set, x); + set->elements--; } } spin_unlock_bh(&set->lock); diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index f5acfb9709c9..6e967f198d1e 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -275,7 +275,6 @@ htable_bits(u32 hashsize) struct htype { struct htable __rcu *table; /* the hash table */ u32 maxelem; /* max elements in the hash */ - u32 elements; /* current element (vs timeout) */ u32 initval; /* random jhash init value */ #ifdef IP_SET_HASH_WITH_MARKMASK u32 markmask; /* markmask value for mark mask to store */ @@ -400,7 +399,7 @@ mtype_flush(struct ip_set *set) #ifdef IP_SET_HASH_WITH_NETS memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family)); #endif - h->elements = 0; + set->elements = 0; } /* Destroy the hashtable part of the set */ @@ -506,7 +505,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) nets_length, k); #endif ip_set_ext_destroy(set, data); - h->elements--; + set->elements--; d++; } } @@ -715,11 +714,11 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, bool deleted = false, forceadd = false, reuse = false; u32 key, multi = 0; - if (h->elements >= h->maxelem) { + if (set->elements >= h->maxelem) { if (SET_WITH_TIMEOUT(set)) /* FIXME: when set is full, we slow down here */ mtype_expire(set, h, NLEN(set->family), set->dsize); - if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set)) + if (set->elements >= h->maxelem && SET_WITH_FORCEADD(set)) forceadd = true; } @@ -732,7 +731,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, pr_warn("Set %s is full, maxelem %u reached\n", set->name, h->maxelem); return -IPSET_ERR_HASH_FULL; - } else if (h->elements >= h->maxelem) { + } else if (set->elements >= h->maxelem) { goto set_full; } old = NULL; @@ -781,11 +780,11 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, NLEN(set->family), i); #endif ip_set_ext_destroy(set, data); - h->elements--; + set->elements--; } goto copy_data; } - if (h->elements >= h->maxelem) + if (set->elements >= h->maxelem) goto set_full; /* Create a new slot */ if (n->pos >= n->size) { @@ -810,7 +809,7 @@ copy_elem: j = n->pos++; data = ahash_data(n, j, set->dsize); copy_data: - h->elements++; + set->elements++; #ifdef IP_SET_HASH_WITH_NETS for (i = 0; i < IPSET_NET_COUNT; i++) mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), @@ -883,7 +882,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, smp_mb__after_atomic(); if (i + 1 == n->pos) n->pos--; - h->elements--; + set->elements--; #ifdef IP_SET_HASH_WITH_NETS for (j = 0; j < IPSET_NET_COUNT; j++) mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)), @@ -1084,7 +1083,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) #endif if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || - nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(h->elements))) + nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 462b0b1870e2..c45516695934 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -166,6 +166,7 @@ __list_set_del_rcu(struct rcu_head * rcu) static inline void list_set_del(struct ip_set *set, struct set_elem *e) { + set->elements--; list_del_rcu(&e->list); call_rcu(&e->rcu, __list_set_del_rcu); } @@ -309,6 +310,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, list_add_rcu(&e->list, &prev->list); else list_add_tail_rcu(&e->list, &map->members); + set->elements++; return 0; } @@ -419,6 +421,7 @@ list_set_flush(struct ip_set *set) list_for_each_entry_safe(e, n, &map->members, list) list_set_del(set, e); + set->elements = 0; } static void @@ -471,7 +474,8 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || - nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) + nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || + nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; -- cgit v1.2.3 From 9e41f26a505cca04b7122e65053cf6447007ea79 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 10 Nov 2016 12:05:34 +0100 Subject: netfilter: ipset: Count non-static extension memory for userspace Non-static (i.e. comment) extension was not counted into the memory size. A new internal counter is introduced for this. In the case of the hash types the sizes of the arrays are counted there as well so that we can avoid to scan the whole set when just the header data is requested. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 8 ++++++-- include/linux/netfilter/ipset/ip_set_comment.h | 7 +++++-- net/netfilter/ipset/ip_set_bitmap_gen.h | 5 +++-- net/netfilter/ipset/ip_set_core.c | 2 +- net/netfilter/ipset/ip_set_hash_gen.h | 26 ++++++++++++++------------ net/netfilter/ipset/ip_set_list_set.c | 5 +++-- 6 files changed, 32 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 4671d740610f..8e42253e5d4d 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -79,10 +79,12 @@ enum ip_set_ext_id { IPSET_EXT_ID_MAX, }; +struct ip_set; + /* Extension type */ struct ip_set_ext_type { /* Destroy extension private data (can be NULL) */ - void (*destroy)(void *ext); + void (*destroy)(struct ip_set *set, void *ext); enum ip_set_extension type; enum ipset_cadt_flags flag; /* Size and minimal alignment */ @@ -252,6 +254,8 @@ struct ip_set { u32 timeout; /* Number of elements (vs timeout) */ u32 elements; + /* Size of the dynamic extensions (vs timeout) */ + size_t ext_size; /* Element data size */ size_t dsize; /* Offsets to extensions in elements */ @@ -268,7 +272,7 @@ ip_set_ext_destroy(struct ip_set *set, void *data) */ if (SET_WITH_COMMENT(set)) ip_set_extensions[IPSET_EXT_ID_COMMENT].destroy( - ext_comment(data, set)); + set, ext_comment(data, set)); } static inline int diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h index 5444b1bbe656..8e2bab1e8e90 100644 --- a/include/linux/netfilter/ipset/ip_set_comment.h +++ b/include/linux/netfilter/ipset/ip_set_comment.h @@ -20,13 +20,14 @@ ip_set_comment_uget(struct nlattr *tb) * The kadt functions don't use the comment extensions in any way. */ static inline void -ip_set_init_comment(struct ip_set_comment *comment, +ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment, const struct ip_set_ext *ext) { struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1); size_t len = ext->comment ? strlen(ext->comment) : 0; if (unlikely(c)) { + set->ext_size -= sizeof(*c) + strlen(c->str) + 1; kfree_rcu(c, rcu); rcu_assign_pointer(comment->c, NULL); } @@ -38,6 +39,7 @@ ip_set_init_comment(struct ip_set_comment *comment, if (unlikely(!c)) return; strlcpy(c->str, ext->comment, len + 1); + set->ext_size += sizeof(*c) + strlen(c->str) + 1; rcu_assign_pointer(comment->c, c); } @@ -58,13 +60,14 @@ ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment) * of the set data anymore. */ static inline void -ip_set_comment_free(struct ip_set_comment *comment) +ip_set_comment_free(struct ip_set *set, struct ip_set_comment *comment) { struct ip_set_comment_rcu *c; c = rcu_dereference_protected(comment->c, 1); if (unlikely(!c)) return; + set->ext_size -= sizeof(*c) + strlen(c->str) + 1; kfree_rcu(c, rcu); rcu_assign_pointer(comment->c, NULL); } diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 1810d1c06e3d..f8ea26cafa30 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -84,6 +84,7 @@ mtype_flush(struct ip_set *set) mtype_ext_cleanup(set); memset(map->members, 0, map->memsize); set->elements = 0; + set->ext_size = 0; } /* Calculate the actual memory size of the set data */ @@ -99,7 +100,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) { const struct mtype *map = set->data; struct nlattr *nested; - size_t memsize = mtype_memsize(map, set->dsize); + size_t memsize = mtype_memsize(map, set->dsize) + set->ext_size; nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) @@ -173,7 +174,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(x, set), ext); if (SET_WITH_COMMENT(set)) - ip_set_init_comment(ext_comment(x, set), ext); + ip_set_init_comment(set, ext_comment(x, set), ext); if (SET_WITH_SKBINFO(set)) ip_set_init_skbinfo(ext_skbinfo(x, set), ext); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index bfacccff7196..23345d2d136a 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -324,7 +324,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) } EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); -typedef void (*destroyer)(void *); +typedef void (*destroyer)(struct ip_set *, void *); /* ipset data extension types, in size order */ const struct ip_set_ext_type ip_set_extensions[] = { diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 6e967f198d1e..0746405a1d14 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -343,21 +343,13 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) /* Calculate the actual memory size of the set data */ static size_t mtype_ahash_memsize(const struct htype *h, const struct htable *t, - u8 nets_length, size_t dsize) + u8 nets_length) { - u32 i; - struct hbucket *n; size_t memsize = sizeof(*h) + sizeof(*t); #ifdef IP_SET_HASH_WITH_NETS memsize += sizeof(struct net_prefixes) * nets_length; #endif - for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = rcu_dereference_bh(hbucket(t, i)); - if (!n) - continue; - memsize += sizeof(struct hbucket) + n->size * dsize; - } return memsize; } @@ -400,6 +392,7 @@ mtype_flush(struct ip_set *set) memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family)); #endif set->elements = 0; + set->ext_size = 0; } /* Destroy the hashtable part of the set */ @@ -531,6 +524,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) d++; } tmp->pos = d; + set->ext_size -= AHASH_INIT_SIZE * dsize; rcu_assign_pointer(hbucket(t, i), tmp); kfree_rcu(n, rcu); } @@ -562,7 +556,7 @@ mtype_resize(struct ip_set *set, bool retried) struct htype *h = set->data; struct htable *t, *orig; u8 htable_bits; - size_t dsize = set->dsize; + size_t extsize, dsize = set->dsize; #ifdef IP_SET_HASH_WITH_NETS u8 flags; struct mtype_elem *tmp; @@ -605,6 +599,7 @@ retry: /* There can't be another parallel resizing, but dumping is possible */ atomic_set(&orig->ref, 1); atomic_inc(&orig->uref); + extsize = 0; pr_debug("attempt to resize set %s from %u to %u, t %p\n", set->name, orig->htable_bits, htable_bits, orig); for (i = 0; i < jhash_size(orig->htable_bits); i++) { @@ -635,6 +630,7 @@ retry: goto cleanup; } m->size = AHASH_INIT_SIZE; + extsize = sizeof(*m) + AHASH_INIT_SIZE * dsize; RCU_INIT_POINTER(hbucket(t, key), m); } else if (m->pos >= m->size) { struct hbucket *ht; @@ -654,6 +650,7 @@ retry: memcpy(ht, m, sizeof(struct hbucket) + m->size * dsize); ht->size = m->size + AHASH_INIT_SIZE; + extsize += AHASH_INIT_SIZE * dsize; kfree(m); m = ht; RCU_INIT_POINTER(hbucket(t, key), ht); @@ -667,6 +664,7 @@ retry: } } rcu_assign_pointer(h->table, t); + set->ext_size = extsize; spin_unlock_bh(&set->lock); @@ -740,6 +738,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (!n) return -ENOMEM; n->size = AHASH_INIT_SIZE; + set->ext_size += sizeof(*n) + AHASH_INIT_SIZE * set->dsize; goto copy_elem; } for (i = 0; i < n->pos; i++) { @@ -803,6 +802,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, memcpy(n, old, sizeof(struct hbucket) + old->size * set->dsize); n->size = old->size + AHASH_INIT_SIZE; + set->ext_size += AHASH_INIT_SIZE * set->dsize; } copy_elem: @@ -823,7 +823,7 @@ overwrite_extensions: if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(data, set), ext); if (SET_WITH_COMMENT(set)) - ip_set_init_comment(ext_comment(data, set), ext); + ip_set_init_comment(set, ext_comment(data, set), ext); if (SET_WITH_SKBINFO(set)) ip_set_init_skbinfo(ext_skbinfo(data, set), ext); /* Must come last for the case when timed out entry is reused */ @@ -895,6 +895,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, k++; } if (n->pos == 0 && k == 0) { + set->ext_size -= sizeof(*n) + n->size * dsize; rcu_assign_pointer(hbucket(t, key), NULL); kfree_rcu(n, rcu); } else if (k >= AHASH_INIT_SIZE) { @@ -913,6 +914,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, k++; } tmp->pos = k; + set->ext_size -= AHASH_INIT_SIZE * dsize; rcu_assign_pointer(hbucket(t, key), tmp); kfree_rcu(n, rcu); } @@ -1061,7 +1063,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) rcu_read_lock_bh(); t = rcu_dereference_bh_nfnl(h->table); - memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize); + memsize = mtype_ahash_memsize(h, t, NLEN(set->family)) + set->ext_size; htable_bits = t->htable_bits; rcu_read_unlock_bh(); diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index c45516695934..dede343a662b 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -228,7 +228,7 @@ list_set_init_extensions(struct ip_set *set, const struct ip_set_ext *ext, if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(e, set), ext); if (SET_WITH_COMMENT(set)) - ip_set_init_comment(ext_comment(e, set), ext); + ip_set_init_comment(set, ext_comment(e, set), ext); if (SET_WITH_SKBINFO(set)) ip_set_init_skbinfo(ext_skbinfo(e, set), ext); /* Update timeout last */ @@ -422,6 +422,7 @@ list_set_flush(struct ip_set *set) list_for_each_entry_safe(e, n, &map->members, list) list_set_del(set, e); set->elements = 0; + set->ext_size = 0; } static void @@ -467,7 +468,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) { const struct list_set *map = set->data; struct nlattr *nested; - size_t memsize = list_set_memsize(map, set->dsize); + size_t memsize = list_set_memsize(map, set->dsize) + set->ext_size; nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) -- cgit v1.2.3 From 39a842e22c1bf3ec3dce36e01fe8ba8ee66c80c8 Mon Sep 17 00:00:00 2001 From: Alan Tull Date: Tue, 1 Nov 2016 14:14:22 -0500 Subject: of/overlay: add of overlay notifications This patch add of overlay notifications. When DT overlays are being added, some drivers/subsystems need to see device tree overlays before the changes go into the live tree. This is distinct from reconfig notifiers that are post-apply or post-remove and which issue very granular notifications without providing access to the context of a whole overlay. The following 4 notificatons are issued: OF_OVERLAY_PRE_APPLY OF_OVERLAY_POST_APPLY OF_OVERLAY_PRE_REMOVE OF_OVERLAY_POST_REMOVE In the case of pre-apply notification, if the notifier returns error, the overlay will be rejected. This patch exports two functions for registering/unregistering notifications: of_overlay_notifier_register(struct notifier_block *nb) of_overlay_notifier_unregister(struct notifier_block *nb) The of_mutex is held during these notifications. The notification data includes pointers to the overlay target and the overlay: struct of_overlay_notify_data { struct device_node *overlay; struct device_node *target; }; Signed-off-by: Alan Tull Acked-by: Rob Herring Acked-by: Moritz Fischer Signed-off-by: Greg Kroah-Hartman --- drivers/of/overlay.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++- include/linux/of.h | 25 +++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index 318dbb51e7a2..0d4cda7050e0 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -58,6 +58,41 @@ struct of_overlay { static int of_overlay_apply_one(struct of_overlay *ov, struct device_node *target, const struct device_node *overlay); +static BLOCKING_NOTIFIER_HEAD(of_overlay_chain); + +int of_overlay_notifier_register(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&of_overlay_chain, nb); +} +EXPORT_SYMBOL_GPL(of_overlay_notifier_register); + +int of_overlay_notifier_unregister(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&of_overlay_chain, nb); +} +EXPORT_SYMBOL_GPL(of_overlay_notifier_unregister); + +static int of_overlay_notify(struct of_overlay *ov, + enum of_overlay_notify_action action) +{ + struct of_overlay_notify_data nd; + int i, ret; + + for (i = 0; i < ov->count; i++) { + struct of_overlay_info *ovinfo = &ov->ovinfo_tab[i]; + + nd.target = ovinfo->target; + nd.overlay = ovinfo->overlay; + + ret = blocking_notifier_call_chain(&of_overlay_chain, + action, &nd); + if (ret) + return notifier_to_errno(ret); + } + + return 0; +} + static int of_overlay_apply_single_property(struct of_overlay *ov, struct device_node *target, struct property *prop) { @@ -368,6 +403,13 @@ int of_overlay_create(struct device_node *tree) goto err_free_idr; } + err = of_overlay_notify(ov, OF_OVERLAY_PRE_APPLY); + if (err < 0) { + pr_err("%s: Pre-apply notifier failed (err=%d)\n", + __func__, err); + goto err_free_idr; + } + /* apply the overlay */ err = of_overlay_apply(ov); if (err) @@ -382,6 +424,8 @@ int of_overlay_create(struct device_node *tree) /* add to the tail of the overlay list */ list_add_tail(&ov->node, &ov_list); + of_overlay_notify(ov, OF_OVERLAY_POST_APPLY); + mutex_unlock(&of_mutex); return id; @@ -498,9 +542,10 @@ int of_overlay_destroy(int id) goto out; } - + of_overlay_notify(ov, OF_OVERLAY_PRE_REMOVE); list_del(&ov->node); __of_changeset_revert(&ov->cset); + of_overlay_notify(ov, OF_OVERLAY_POST_REMOVE); of_free_overlay_info(ov); idr_remove(&ov_idr, id); of_changeset_destroy(&ov->cset); diff --git a/include/linux/of.h b/include/linux/of.h index 299aeb192727..d72f01009297 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1266,6 +1266,18 @@ static inline bool of_device_is_system_power_controller(const struct device_node * Overlay support */ +enum of_overlay_notify_action { + OF_OVERLAY_PRE_APPLY, + OF_OVERLAY_POST_APPLY, + OF_OVERLAY_PRE_REMOVE, + OF_OVERLAY_POST_REMOVE, +}; + +struct of_overlay_notify_data { + struct device_node *overlay; + struct device_node *target; +}; + #ifdef CONFIG_OF_OVERLAY /* ID based overlays; the API for external users */ @@ -1273,6 +1285,9 @@ int of_overlay_create(struct device_node *tree); int of_overlay_destroy(int id); int of_overlay_destroy_all(void); +int of_overlay_notifier_register(struct notifier_block *nb); +int of_overlay_notifier_unregister(struct notifier_block *nb); + #else static inline int of_overlay_create(struct device_node *tree) @@ -1290,6 +1305,16 @@ static inline int of_overlay_destroy_all(void) return -ENOTSUPP; } +static inline int of_overlay_notifier_register(struct notifier_block *nb) +{ + return 0; +} + +static inline int of_overlay_notifier_unregister(struct notifier_block *nb) +{ + return 0; +} + #endif #endif /* _LINUX_OF_H */ -- cgit v1.2.3 From 9dce0287a60d72656a787b075f1b9162ff3cb142 Mon Sep 17 00:00:00 2001 From: Alan Tull Date: Tue, 1 Nov 2016 14:14:23 -0500 Subject: fpga: add method to get fpga manager from device The intent is to provide a non-DT method of getting ahold of a FPGA manager to do some FPGA programming. This patch refactors of_fpga_mgr_get() to reuse most of it while adding a new method fpga_mgr_get() for getting a pointer to a fpga manager struct, given the device. Signed-off-by: Alan Tull Signed-off-by: Greg Kroah-Hartman --- Documentation/fpga/fpga-mgr.txt | 6 ++-- drivers/fpga/fpga-mgr.c | 76 +++++++++++++++++++++++++++++------------ include/linux/fpga/fpga-mgr.h | 2 ++ 3 files changed, 60 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/Documentation/fpga/fpga-mgr.txt b/Documentation/fpga/fpga-mgr.txt index ce3e84fa9023..d056d691e8fd 100644 --- a/Documentation/fpga/fpga-mgr.txt +++ b/Documentation/fpga/fpga-mgr.txt @@ -38,11 +38,13 @@ To get/put a reference to a FPGA manager: ----------------------------------------- struct fpga_manager *of_fpga_mgr_get(struct device_node *node); + struct fpga_manager *fpga_mgr_get(struct device *dev); + +Given a DT node or device, get an exclusive reference to a FPGA manager. void fpga_mgr_put(struct fpga_manager *mgr); -Given a DT node, get an exclusive reference to a FPGA manager or release -the reference. +Release the reference. To register or unregister the low level FPGA-specific driver: diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c index 953dc9195937..b690e65d55fe 100644 --- a/drivers/fpga/fpga-mgr.c +++ b/drivers/fpga/fpga-mgr.c @@ -39,7 +39,8 @@ static struct class *fpga_mgr_class; * Step the low level fpga manager through the device-specific steps of getting * an FPGA ready to be configured, writing the image to it, then doing whatever * post-configuration steps necessary. This code assumes the caller got the - * mgr pointer from of_fpga_mgr_get() and checked that it is not an error code. + * mgr pointer from of_fpga_mgr_get() or fpga_mgr_get() and checked that it is + * not an error code. * * Return: 0 on success, negative error code otherwise. */ @@ -99,7 +100,8 @@ EXPORT_SYMBOL_GPL(fpga_mgr_buf_load); * Request an FPGA image using the firmware class, then write out to the FPGA. * Update the state before each step to provide info on what step failed if * there is a failure. This code assumes the caller got the mgr pointer - * from of_fpga_mgr_get() and checked that it is not an error code. + * from of_fpga_mgr_get() or fpga_mgr_get() and checked that it is not an error + * code. * * Return: 0 on success, negative error code otherwise. */ @@ -181,30 +183,11 @@ static struct attribute *fpga_mgr_attrs[] = { }; ATTRIBUTE_GROUPS(fpga_mgr); -static int fpga_mgr_of_node_match(struct device *dev, const void *data) -{ - return dev->of_node == data; -} - -/** - * of_fpga_mgr_get - get an exclusive reference to a fpga mgr - * @node: device node - * - * Given a device node, get an exclusive reference to a fpga mgr. - * - * Return: fpga manager struct or IS_ERR() condition containing error code. - */ -struct fpga_manager *of_fpga_mgr_get(struct device_node *node) +struct fpga_manager *__fpga_mgr_get(struct device *dev) { struct fpga_manager *mgr; - struct device *dev; int ret = -ENODEV; - dev = class_find_device(fpga_mgr_class, NULL, node, - fpga_mgr_of_node_match); - if (!dev) - return ERR_PTR(-ENODEV); - mgr = to_fpga_manager(dev); if (!mgr) goto err_dev; @@ -226,6 +209,55 @@ err_dev: put_device(dev); return ERR_PTR(ret); } + +static int fpga_mgr_dev_match(struct device *dev, const void *data) +{ + return dev->parent == data; +} + +/** + * fpga_mgr_get - get an exclusive reference to a fpga mgr + * @dev: parent device that fpga mgr was registered with + * + * Given a device, get an exclusive reference to a fpga mgr. + * + * Return: fpga manager struct or IS_ERR() condition containing error code. + */ +struct fpga_manager *fpga_mgr_get(struct device *dev) +{ + struct device *mgr_dev = class_find_device(fpga_mgr_class, NULL, dev, + fpga_mgr_dev_match); + if (!mgr_dev) + return ERR_PTR(-ENODEV); + + return __fpga_mgr_get(mgr_dev); +} +EXPORT_SYMBOL_GPL(fpga_mgr_get); + +static int fpga_mgr_of_node_match(struct device *dev, const void *data) +{ + return dev->of_node == data; +} + +/** + * of_fpga_mgr_get - get an exclusive reference to a fpga mgr + * @node: device node + * + * Given a device node, get an exclusive reference to a fpga mgr. + * + * Return: fpga manager struct or IS_ERR() condition containing error code. + */ +struct fpga_manager *of_fpga_mgr_get(struct device_node *node) +{ + struct device *dev; + + dev = class_find_device(fpga_mgr_class, NULL, node, + fpga_mgr_of_node_match); + if (!dev) + return ERR_PTR(-ENODEV); + + return __fpga_mgr_get(dev); +} EXPORT_SYMBOL_GPL(of_fpga_mgr_get); /** diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h index 0940bf45e2f2..957b5ac9428a 100644 --- a/include/linux/fpga/fpga-mgr.h +++ b/include/linux/fpga/fpga-mgr.h @@ -117,6 +117,8 @@ int fpga_mgr_firmware_load(struct fpga_manager *mgr, u32 flags, struct fpga_manager *of_fpga_mgr_get(struct device_node *node); +struct fpga_manager *fpga_mgr_get(struct device *dev); + void fpga_mgr_put(struct fpga_manager *mgr); int fpga_mgr_register(struct device *dev, const char *name, -- cgit v1.2.3 From 1df2865f8dd9d56cb76aa7aa1298921e7bece2af Mon Sep 17 00:00:00 2001 From: Alan Tull Date: Tue, 1 Nov 2016 14:14:26 -0500 Subject: fpga-mgr: add fpga image information struct This patch adds a minor change in the FPGA Manager API to hold information that is specific to an FPGA image file. This change is expected to bring little, if any, pain. The socfpga and zynq drivers are fixed up in this patch. An FPGA image file will have particulars that affect how the image is programmed to the FPGA. One example is that current 'flags' currently has one bit which shows whether the FPGA image was built for full reconfiguration or partial reconfiguration. Another example is timeout values for enabling or disabling the bridges in the FPGA. As the complexity of the FPGA design increases, the bridges in the FPGA may take longer times to enable or disable. This patch adds a new 'struct fpga_image_info', moves the current 'u32 flags' to it. Two other image-specific u32's are added for the bridge enable/disable timeouts. The FPGA Manager API functions are changed, replacing the 'u32 flag' parameter with a pointer to struct fpga_image_info. Subsequent patches fix the existing low level FPGA manager drivers. Signed-off-by: Alan Tull Acked-by: Moritz Fischer Signed-off-by: Greg Kroah-Hartman --- drivers/fpga/fpga-mgr.c | 17 +++++++++-------- drivers/fpga/socfpga.c | 7 ++++--- drivers/fpga/zynq-fpga.c | 10 ++++++---- include/linux/fpga/fpga-mgr.h | 23 +++++++++++++++++++---- 4 files changed, 38 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c index b690e65d55fe..79ce2eea44db 100644 --- a/drivers/fpga/fpga-mgr.c +++ b/drivers/fpga/fpga-mgr.c @@ -32,7 +32,7 @@ static struct class *fpga_mgr_class; /** * fpga_mgr_buf_load - load fpga from image in buffer * @mgr: fpga manager - * @flags: flags setting fpga confuration modes + * @info: fpga image specific information * @buf: buffer contain fpga image * @count: byte count of buf * @@ -44,8 +44,8 @@ static struct class *fpga_mgr_class; * * Return: 0 on success, negative error code otherwise. */ -int fpga_mgr_buf_load(struct fpga_manager *mgr, u32 flags, const char *buf, - size_t count) +int fpga_mgr_buf_load(struct fpga_manager *mgr, struct fpga_image_info *info, + const char *buf, size_t count) { struct device *dev = &mgr->dev; int ret; @@ -56,7 +56,7 @@ int fpga_mgr_buf_load(struct fpga_manager *mgr, u32 flags, const char *buf, * ready to receive an FPGA image. */ mgr->state = FPGA_MGR_STATE_WRITE_INIT; - ret = mgr->mops->write_init(mgr, flags, buf, count); + ret = mgr->mops->write_init(mgr, info, buf, count); if (ret) { dev_err(dev, "Error preparing FPGA for writing\n"); mgr->state = FPGA_MGR_STATE_WRITE_INIT_ERR; @@ -79,7 +79,7 @@ int fpga_mgr_buf_load(struct fpga_manager *mgr, u32 flags, const char *buf, * steps to finish and set the FPGA into operating mode. */ mgr->state = FPGA_MGR_STATE_WRITE_COMPLETE; - ret = mgr->mops->write_complete(mgr, flags); + ret = mgr->mops->write_complete(mgr, info); if (ret) { dev_err(dev, "Error after writing image data to FPGA\n"); mgr->state = FPGA_MGR_STATE_WRITE_COMPLETE_ERR; @@ -94,7 +94,7 @@ EXPORT_SYMBOL_GPL(fpga_mgr_buf_load); /** * fpga_mgr_firmware_load - request firmware and load to fpga * @mgr: fpga manager - * @flags: flags setting fpga confuration modes + * @info: fpga image specific information * @image_name: name of image file on the firmware search path * * Request an FPGA image using the firmware class, then write out to the FPGA. @@ -105,7 +105,8 @@ EXPORT_SYMBOL_GPL(fpga_mgr_buf_load); * * Return: 0 on success, negative error code otherwise. */ -int fpga_mgr_firmware_load(struct fpga_manager *mgr, u32 flags, +int fpga_mgr_firmware_load(struct fpga_manager *mgr, + struct fpga_image_info *info, const char *image_name) { struct device *dev = &mgr->dev; @@ -123,7 +124,7 @@ int fpga_mgr_firmware_load(struct fpga_manager *mgr, u32 flags, return ret; } - ret = fpga_mgr_buf_load(mgr, flags, fw->data, fw->size); + ret = fpga_mgr_buf_load(mgr, info, fw->data, fw->size); release_firmware(fw); diff --git a/drivers/fpga/socfpga.c b/drivers/fpga/socfpga.c index 27d2ff28132c..b6672e66cda6 100644 --- a/drivers/fpga/socfpga.c +++ b/drivers/fpga/socfpga.c @@ -407,13 +407,14 @@ static int socfpga_fpga_reset(struct fpga_manager *mgr) /* * Prepare the FPGA to receive the configuration data. */ -static int socfpga_fpga_ops_configure_init(struct fpga_manager *mgr, u32 flags, +static int socfpga_fpga_ops_configure_init(struct fpga_manager *mgr, + struct fpga_image_info *info, const char *buf, size_t count) { struct socfpga_fpga_priv *priv = mgr->priv; int ret; - if (flags & FPGA_MGR_PARTIAL_RECONFIG) { + if (info->flags & FPGA_MGR_PARTIAL_RECONFIG) { dev_err(&mgr->dev, "Partial reconfiguration not supported.\n"); return -EINVAL; } @@ -478,7 +479,7 @@ static int socfpga_fpga_ops_configure_write(struct fpga_manager *mgr, } static int socfpga_fpga_ops_configure_complete(struct fpga_manager *mgr, - u32 flags) + struct fpga_image_info *info) { struct socfpga_fpga_priv *priv = mgr->priv; u32 status; diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c index c2fb4120bd62..249682e92502 100644 --- a/drivers/fpga/zynq-fpga.c +++ b/drivers/fpga/zynq-fpga.c @@ -175,7 +175,8 @@ static irqreturn_t zynq_fpga_isr(int irq, void *data) return IRQ_HANDLED; } -static int zynq_fpga_ops_write_init(struct fpga_manager *mgr, u32 flags, +static int zynq_fpga_ops_write_init(struct fpga_manager *mgr, + struct fpga_image_info *info, const char *buf, size_t count) { struct zynq_fpga_priv *priv; @@ -189,7 +190,7 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr, u32 flags, return err; /* don't globally reset PL if we're doing partial reconfig */ - if (!(flags & FPGA_MGR_PARTIAL_RECONFIG)) { + if (!(info->flags & FPGA_MGR_PARTIAL_RECONFIG)) { /* assert AXI interface resets */ regmap_write(priv->slcr, SLCR_FPGA_RST_CTRL_OFFSET, FPGA_RST_ALL_MASK); @@ -343,7 +344,8 @@ out_free: return err; } -static int zynq_fpga_ops_write_complete(struct fpga_manager *mgr, u32 flags) +static int zynq_fpga_ops_write_complete(struct fpga_manager *mgr, + struct fpga_image_info *info) { struct zynq_fpga_priv *priv = mgr->priv; int err; @@ -364,7 +366,7 @@ static int zynq_fpga_ops_write_complete(struct fpga_manager *mgr, u32 flags) return err; /* for the partial reconfig case we didn't touch the level shifters */ - if (!(flags & FPGA_MGR_PARTIAL_RECONFIG)) { + if (!(info->flags & FPGA_MGR_PARTIAL_RECONFIG)) { /* enable level shifters from PL to PS */ regmap_write(priv->slcr, SLCR_LVL_SHFTR_EN_OFFSET, LVL_SHFTR_ENABLE_PL_TO_PS); diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h index 957b5ac9428a..55803186e0ea 100644 --- a/include/linux/fpga/fpga-mgr.h +++ b/include/linux/fpga/fpga-mgr.h @@ -68,6 +68,18 @@ enum fpga_mgr_states { */ #define FPGA_MGR_PARTIAL_RECONFIG BIT(0) +/** + * struct fpga_image_info - information specific to a FPGA image + * @flags: boolean flags as defined above + * @enable_timeout_us: maximum time to enable traffic through bridge (uSec) + * @disable_timeout_us: maximum time to disable traffic through bridge (uSec) + */ +struct fpga_image_info { + u32 flags; + u32 enable_timeout_us; + u32 disable_timeout_us; +}; + /** * struct fpga_manager_ops - ops for low level fpga manager drivers * @state: returns an enum value of the FPGA's state @@ -82,10 +94,12 @@ enum fpga_mgr_states { */ struct fpga_manager_ops { enum fpga_mgr_states (*state)(struct fpga_manager *mgr); - int (*write_init)(struct fpga_manager *mgr, u32 flags, + int (*write_init)(struct fpga_manager *mgr, + struct fpga_image_info *info, const char *buf, size_t count); int (*write)(struct fpga_manager *mgr, const char *buf, size_t count); - int (*write_complete)(struct fpga_manager *mgr, u32 flags); + int (*write_complete)(struct fpga_manager *mgr, + struct fpga_image_info *info); void (*fpga_remove)(struct fpga_manager *mgr); }; @@ -109,10 +123,11 @@ struct fpga_manager { #define to_fpga_manager(d) container_of(d, struct fpga_manager, dev) -int fpga_mgr_buf_load(struct fpga_manager *mgr, u32 flags, +int fpga_mgr_buf_load(struct fpga_manager *mgr, struct fpga_image_info *info, const char *buf, size_t count); -int fpga_mgr_firmware_load(struct fpga_manager *mgr, u32 flags, +int fpga_mgr_firmware_load(struct fpga_manager *mgr, + struct fpga_image_info *info, const char *image_name); struct fpga_manager *of_fpga_mgr_get(struct device_node *node); -- cgit v1.2.3 From 21aeda950c5f84a8351b862816d832120b217a9b Mon Sep 17 00:00:00 2001 From: Alan Tull Date: Tue, 1 Nov 2016 14:14:28 -0500 Subject: fpga: add fpga bridge framework This framework adds API functions for enabling/ disabling FPGA bridges under kernel control. This allows the Linux kernel to disable FPGA bridges during FPGA reprogramming and to enable FPGA bridges when FPGA reprogramming is done. This framework is be manufacturer-agnostic, allowing it to be used in interfaces that use the FPGA Manager Framework to reprogram FPGA's. The functions are: * of_fpga_bridge_get * fpga_bridge_put Get/put an exclusive reference to a FPGA bridge. * fpga_bridge_enable * fpga_bridge_disable Enable/Disable traffic through a bridge. * fpga_bridge_register * fpga_bridge_unregister Register/unregister a device-specific low level FPGA Bridge driver. Get an exclusive reference to a bridge and add it to a list: * fpga_bridge_get_to_list To enable/disable/put a set of bridges that are on a list: * fpga_bridges_enable * fpga_bridges_disable * fpga_bridges_put Signed-off-by: Alan Tull Signed-off-by: Greg Kroah-Hartman --- drivers/fpga/Kconfig | 7 + drivers/fpga/Makefile | 3 + drivers/fpga/fpga-bridge.c | 395 +++++++++++++++++++++++++++++++++++++++ include/linux/fpga/fpga-bridge.h | 60 ++++++ 4 files changed, 465 insertions(+) create mode 100644 drivers/fpga/fpga-bridge.c create mode 100644 include/linux/fpga/fpga-bridge.h (limited to 'include/linux') diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig index cd84934774cc..9b20f45c85bf 100644 --- a/drivers/fpga/Kconfig +++ b/drivers/fpga/Kconfig @@ -26,6 +26,13 @@ config FPGA_MGR_ZYNQ_FPGA help FPGA manager driver support for Xilinx Zynq FPGAs. +config FPGA_BRIDGE + tristate "FPGA Bridge Framework" + depends on OF + help + Say Y here if you want to support bridges connected between host + processors and FPGAs or between FPGAs. + endif # FPGA endmenu diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile index 8d83fc6b1613..4baef0022d4c 100644 --- a/drivers/fpga/Makefile +++ b/drivers/fpga/Makefile @@ -8,3 +8,6 @@ obj-$(CONFIG_FPGA) += fpga-mgr.o # FPGA Manager Drivers obj-$(CONFIG_FPGA_MGR_SOCFPGA) += socfpga.o obj-$(CONFIG_FPGA_MGR_ZYNQ_FPGA) += zynq-fpga.o + +# FPGA Bridge Drivers +obj-$(CONFIG_FPGA_BRIDGE) += fpga-bridge.o diff --git a/drivers/fpga/fpga-bridge.c b/drivers/fpga/fpga-bridge.c new file mode 100644 index 000000000000..33ee83e6373c --- /dev/null +++ b/drivers/fpga/fpga-bridge.c @@ -0,0 +1,395 @@ +/* + * FPGA Bridge Framework Driver + * + * Copyright (C) 2013-2016 Altera Corporation, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_IDA(fpga_bridge_ida); +static struct class *fpga_bridge_class; + +/* Lock for adding/removing bridges to linked lists*/ +spinlock_t bridge_list_lock; + +static int fpga_bridge_of_node_match(struct device *dev, const void *data) +{ + return dev->of_node == data; +} + +/** + * fpga_bridge_enable - Enable transactions on the bridge + * + * @bridge: FPGA bridge + * + * Return: 0 for success, error code otherwise. + */ +int fpga_bridge_enable(struct fpga_bridge *bridge) +{ + dev_dbg(&bridge->dev, "enable\n"); + + if (bridge->br_ops && bridge->br_ops->enable_set) + return bridge->br_ops->enable_set(bridge, 1); + + return 0; +} +EXPORT_SYMBOL_GPL(fpga_bridge_enable); + +/** + * fpga_bridge_disable - Disable transactions on the bridge + * + * @bridge: FPGA bridge + * + * Return: 0 for success, error code otherwise. + */ +int fpga_bridge_disable(struct fpga_bridge *bridge) +{ + dev_dbg(&bridge->dev, "disable\n"); + + if (bridge->br_ops && bridge->br_ops->enable_set) + return bridge->br_ops->enable_set(bridge, 0); + + return 0; +} +EXPORT_SYMBOL_GPL(fpga_bridge_disable); + +/** + * of_fpga_bridge_get - get an exclusive reference to a fpga bridge + * + * @np: node pointer of a FPGA bridge + * @info: fpga image specific information + * + * Return fpga_bridge struct if successful. + * Return -EBUSY if someone already has a reference to the bridge. + * Return -ENODEV if @np is not a FPGA Bridge. + */ +struct fpga_bridge *of_fpga_bridge_get(struct device_node *np, + struct fpga_image_info *info) + +{ + struct device *dev; + struct fpga_bridge *bridge; + int ret = -ENODEV; + + dev = class_find_device(fpga_bridge_class, NULL, np, + fpga_bridge_of_node_match); + if (!dev) + goto err_dev; + + bridge = to_fpga_bridge(dev); + if (!bridge) + goto err_dev; + + bridge->info = info; + + if (!mutex_trylock(&bridge->mutex)) { + ret = -EBUSY; + goto err_dev; + } + + if (!try_module_get(dev->parent->driver->owner)) + goto err_ll_mod; + + dev_dbg(&bridge->dev, "get\n"); + + return bridge; + +err_ll_mod: + mutex_unlock(&bridge->mutex); +err_dev: + put_device(dev); + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(of_fpga_bridge_get); + +/** + * fpga_bridge_put - release a reference to a bridge + * + * @bridge: FPGA bridge + */ +void fpga_bridge_put(struct fpga_bridge *bridge) +{ + dev_dbg(&bridge->dev, "put\n"); + + bridge->info = NULL; + module_put(bridge->dev.parent->driver->owner); + mutex_unlock(&bridge->mutex); + put_device(&bridge->dev); +} +EXPORT_SYMBOL_GPL(fpga_bridge_put); + +/** + * fpga_bridges_enable - enable bridges in a list + * @bridge_list: list of FPGA bridges + * + * Enable each bridge in the list. If list is empty, do nothing. + * + * Return 0 for success or empty bridge list; return error code otherwise. + */ +int fpga_bridges_enable(struct list_head *bridge_list) +{ + struct fpga_bridge *bridge; + struct list_head *node; + int ret; + + list_for_each(node, bridge_list) { + bridge = list_entry(node, struct fpga_bridge, node); + ret = fpga_bridge_enable(bridge); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(fpga_bridges_enable); + +/** + * fpga_bridges_disable - disable bridges in a list + * + * @bridge_list: list of FPGA bridges + * + * Disable each bridge in the list. If list is empty, do nothing. + * + * Return 0 for success or empty bridge list; return error code otherwise. + */ +int fpga_bridges_disable(struct list_head *bridge_list) +{ + struct fpga_bridge *bridge; + struct list_head *node; + int ret; + + list_for_each(node, bridge_list) { + bridge = list_entry(node, struct fpga_bridge, node); + ret = fpga_bridge_disable(bridge); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(fpga_bridges_disable); + +/** + * fpga_bridges_put - put bridges + * + * @bridge_list: list of FPGA bridges + * + * For each bridge in the list, put the bridge and remove it from the list. + * If list is empty, do nothing. + */ +void fpga_bridges_put(struct list_head *bridge_list) +{ + struct fpga_bridge *bridge; + struct list_head *node, *next; + unsigned long flags; + + list_for_each_safe(node, next, bridge_list) { + bridge = list_entry(node, struct fpga_bridge, node); + + fpga_bridge_put(bridge); + + spin_lock_irqsave(&bridge_list_lock, flags); + list_del(&bridge->node); + spin_unlock_irqrestore(&bridge_list_lock, flags); + } +} +EXPORT_SYMBOL_GPL(fpga_bridges_put); + +/** + * fpga_bridges_get_to_list - get a bridge, add it to a list + * + * @np: node pointer of a FPGA bridge + * @info: fpga image specific information + * @bridge_list: list of FPGA bridges + * + * Get an exclusive reference to the bridge and and it to the list. + * + * Return 0 for success, error code from of_fpga_bridge_get() othewise. + */ +int fpga_bridge_get_to_list(struct device_node *np, + struct fpga_image_info *info, + struct list_head *bridge_list) +{ + struct fpga_bridge *bridge; + unsigned long flags; + + bridge = of_fpga_bridge_get(np, info); + if (IS_ERR(bridge)) + return PTR_ERR(bridge); + + spin_lock_irqsave(&bridge_list_lock, flags); + list_add(&bridge->node, bridge_list); + spin_unlock_irqrestore(&bridge_list_lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(fpga_bridge_get_to_list); + +static ssize_t name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct fpga_bridge *bridge = to_fpga_bridge(dev); + + return sprintf(buf, "%s\n", bridge->name); +} + +static ssize_t state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct fpga_bridge *bridge = to_fpga_bridge(dev); + int enable = 1; + + if (bridge->br_ops && bridge->br_ops->enable_show) + enable = bridge->br_ops->enable_show(bridge); + + return sprintf(buf, "%s\n", enable ? "enabled" : "disabled"); +} + +static DEVICE_ATTR_RO(name); +static DEVICE_ATTR_RO(state); + +static struct attribute *fpga_bridge_attrs[] = { + &dev_attr_name.attr, + &dev_attr_state.attr, + NULL, +}; +ATTRIBUTE_GROUPS(fpga_bridge); + +/** + * fpga_bridge_register - register a fpga bridge driver + * @dev: FPGA bridge device from pdev + * @name: FPGA bridge name + * @br_ops: pointer to structure of fpga bridge ops + * @priv: FPGA bridge private data + * + * Return: 0 for success, error code otherwise. + */ +int fpga_bridge_register(struct device *dev, const char *name, + const struct fpga_bridge_ops *br_ops, void *priv) +{ + struct fpga_bridge *bridge; + int id, ret = 0; + + if (!name || !strlen(name)) { + dev_err(dev, "Attempt to register with no name!\n"); + return -EINVAL; + } + + bridge = kzalloc(sizeof(*bridge), GFP_KERNEL); + if (!bridge) + return -ENOMEM; + + id = ida_simple_get(&fpga_bridge_ida, 0, 0, GFP_KERNEL); + if (id < 0) { + ret = id; + goto error_kfree; + } + + mutex_init(&bridge->mutex); + INIT_LIST_HEAD(&bridge->node); + + bridge->name = name; + bridge->br_ops = br_ops; + bridge->priv = priv; + + device_initialize(&bridge->dev); + bridge->dev.class = fpga_bridge_class; + bridge->dev.parent = dev; + bridge->dev.of_node = dev->of_node; + bridge->dev.id = id; + dev_set_drvdata(dev, bridge); + + ret = dev_set_name(&bridge->dev, "br%d", id); + if (ret) + goto error_device; + + ret = device_add(&bridge->dev); + if (ret) + goto error_device; + + of_platform_populate(dev->of_node, NULL, NULL, dev); + + dev_info(bridge->dev.parent, "fpga bridge [%s] registered\n", + bridge->name); + + return 0; + +error_device: + ida_simple_remove(&fpga_bridge_ida, id); +error_kfree: + kfree(bridge); + + return ret; +} +EXPORT_SYMBOL_GPL(fpga_bridge_register); + +/** + * fpga_bridge_unregister - unregister a fpga bridge driver + * @dev: FPGA bridge device from pdev + */ +void fpga_bridge_unregister(struct device *dev) +{ + struct fpga_bridge *bridge = dev_get_drvdata(dev); + + /* + * If the low level driver provides a method for putting bridge into + * a desired state upon unregister, do it. + */ + if (bridge->br_ops && bridge->br_ops->fpga_bridge_remove) + bridge->br_ops->fpga_bridge_remove(bridge); + + device_unregister(&bridge->dev); +} +EXPORT_SYMBOL_GPL(fpga_bridge_unregister); + +static void fpga_bridge_dev_release(struct device *dev) +{ + struct fpga_bridge *bridge = to_fpga_bridge(dev); + + ida_simple_remove(&fpga_bridge_ida, bridge->dev.id); + kfree(bridge); +} + +static int __init fpga_bridge_dev_init(void) +{ + spin_lock_init(&bridge_list_lock); + + fpga_bridge_class = class_create(THIS_MODULE, "fpga_bridge"); + if (IS_ERR(fpga_bridge_class)) + return PTR_ERR(fpga_bridge_class); + + fpga_bridge_class->dev_groups = fpga_bridge_groups; + fpga_bridge_class->dev_release = fpga_bridge_dev_release; + + return 0; +} + +static void __exit fpga_bridge_dev_exit(void) +{ + class_destroy(fpga_bridge_class); + ida_destroy(&fpga_bridge_ida); +} + +MODULE_DESCRIPTION("FPGA Bridge Driver"); +MODULE_AUTHOR("Alan Tull "); +MODULE_LICENSE("GPL v2"); + +subsys_initcall(fpga_bridge_dev_init); +module_exit(fpga_bridge_dev_exit); diff --git a/include/linux/fpga/fpga-bridge.h b/include/linux/fpga/fpga-bridge.h new file mode 100644 index 000000000000..dba6e3c697c7 --- /dev/null +++ b/include/linux/fpga/fpga-bridge.h @@ -0,0 +1,60 @@ +#include +#include + +#ifndef _LINUX_FPGA_BRIDGE_H +#define _LINUX_FPGA_BRIDGE_H + +struct fpga_bridge; + +/** + * struct fpga_bridge_ops - ops for low level FPGA bridge drivers + * @enable_show: returns the FPGA bridge's status + * @enable_set: set a FPGA bridge as enabled or disabled + * @fpga_bridge_remove: set FPGA into a specific state during driver remove + */ +struct fpga_bridge_ops { + int (*enable_show)(struct fpga_bridge *bridge); + int (*enable_set)(struct fpga_bridge *bridge, bool enable); + void (*fpga_bridge_remove)(struct fpga_bridge *bridge); +}; + +/** + * struct fpga_bridge - FPGA bridge structure + * @name: name of low level FPGA bridge + * @dev: FPGA bridge device + * @mutex: enforces exclusive reference to bridge + * @br_ops: pointer to struct of FPGA bridge ops + * @info: fpga image specific information + * @node: FPGA bridge list node + * @priv: low level driver private date + */ +struct fpga_bridge { + const char *name; + struct device dev; + struct mutex mutex; /* for exclusive reference to bridge */ + const struct fpga_bridge_ops *br_ops; + struct fpga_image_info *info; + struct list_head node; + void *priv; +}; + +#define to_fpga_bridge(d) container_of(d, struct fpga_bridge, dev) + +struct fpga_bridge *of_fpga_bridge_get(struct device_node *node, + struct fpga_image_info *info); +void fpga_bridge_put(struct fpga_bridge *bridge); +int fpga_bridge_enable(struct fpga_bridge *bridge); +int fpga_bridge_disable(struct fpga_bridge *bridge); + +int fpga_bridges_enable(struct list_head *bridge_list); +int fpga_bridges_disable(struct list_head *bridge_list); +void fpga_bridges_put(struct list_head *bridge_list); +int fpga_bridge_get_to_list(struct device_node *np, + struct fpga_image_info *info, + struct list_head *bridge_list); + +int fpga_bridge_register(struct device *dev, const char *name, + const struct fpga_bridge_ops *br_ops, void *priv); +void fpga_bridge_unregister(struct device *dev); + +#endif /* _LINUX_FPGA_BRIDGE_H */ -- cgit v1.2.3 From 0fa20cdfcc1f68847cdfc47824476301eedc8297 Mon Sep 17 00:00:00 2001 From: Alan Tull Date: Tue, 1 Nov 2016 14:14:29 -0500 Subject: fpga: fpga-region: device tree control for FPGA FPGA Regions support programming FPGA under control of the Device Tree. Signed-off-by: Alan Tull Signed-off-by: Greg Kroah-Hartman --- drivers/fpga/Kconfig | 7 + drivers/fpga/Makefile | 3 + drivers/fpga/fpga-region.c | 603 ++++++++++++++++++++++++++++++++++++++++++ include/linux/fpga/fpga-mgr.h | 2 + 4 files changed, 615 insertions(+) create mode 100644 drivers/fpga/fpga-region.c (limited to 'include/linux') diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig index 9b20f45c85bf..e0e1257e17e2 100644 --- a/drivers/fpga/Kconfig +++ b/drivers/fpga/Kconfig @@ -13,6 +13,13 @@ config FPGA if FPGA +config FPGA_REGION + tristate "FPGA Region" + depends on OF && FPGA_BRIDGE + help + FPGA Regions allow loading FPGA images under control of + the Device Tree. + config FPGA_MGR_SOCFPGA tristate "Altera SOCFPGA FPGA Manager" depends on ARCH_SOCFPGA diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile index 4baef0022d4c..8d746c342533 100644 --- a/drivers/fpga/Makefile +++ b/drivers/fpga/Makefile @@ -11,3 +11,6 @@ obj-$(CONFIG_FPGA_MGR_ZYNQ_FPGA) += zynq-fpga.o # FPGA Bridge Drivers obj-$(CONFIG_FPGA_BRIDGE) += fpga-bridge.o + +# High Level Interfaces +obj-$(CONFIG_FPGA_REGION) += fpga-region.o diff --git a/drivers/fpga/fpga-region.c b/drivers/fpga/fpga-region.c new file mode 100644 index 000000000000..3222fdbad75a --- /dev/null +++ b/drivers/fpga/fpga-region.c @@ -0,0 +1,603 @@ +/* + * FPGA Region - Device Tree support for FPGA programming under Linux + * + * Copyright (C) 2013-2016 Altera Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * struct fpga_region - FPGA Region structure + * @dev: FPGA Region device + * @mutex: enforces exclusive reference to region + * @bridge_list: list of FPGA bridges specified in region + * @info: fpga image specific information + */ +struct fpga_region { + struct device dev; + struct mutex mutex; /* for exclusive reference to region */ + struct list_head bridge_list; + struct fpga_image_info *info; +}; + +#define to_fpga_region(d) container_of(d, struct fpga_region, dev) + +static DEFINE_IDA(fpga_region_ida); +static struct class *fpga_region_class; + +static const struct of_device_id fpga_region_of_match[] = { + { .compatible = "fpga-region", }, + {}, +}; +MODULE_DEVICE_TABLE(of, fpga_region_of_match); + +static int fpga_region_of_node_match(struct device *dev, const void *data) +{ + return dev->of_node == data; +} + +/** + * fpga_region_find - find FPGA region + * @np: device node of FPGA Region + * Caller will need to put_device(®ion->dev) when done. + * Returns FPGA Region struct or NULL + */ +static struct fpga_region *fpga_region_find(struct device_node *np) +{ + struct device *dev; + + dev = class_find_device(fpga_region_class, NULL, np, + fpga_region_of_node_match); + if (!dev) + return NULL; + + return to_fpga_region(dev); +} + +/** + * fpga_region_get - get an exclusive reference to a fpga region + * @region: FPGA Region struct + * + * Caller should call fpga_region_put() when done with region. + * + * Return fpga_region struct if successful. + * Return -EBUSY if someone already has a reference to the region. + * Return -ENODEV if @np is not a FPGA Region. + */ +static struct fpga_region *fpga_region_get(struct fpga_region *region) +{ + struct device *dev = ®ion->dev; + + if (!mutex_trylock(®ion->mutex)) { + dev_dbg(dev, "%s: FPGA Region already in use\n", __func__); + return ERR_PTR(-EBUSY); + } + + get_device(dev); + of_node_get(dev->of_node); + if (!try_module_get(dev->parent->driver->owner)) { + of_node_put(dev->of_node); + put_device(dev); + mutex_unlock(®ion->mutex); + return ERR_PTR(-ENODEV); + } + + dev_dbg(®ion->dev, "get\n"); + + return region; +} + +/** + * fpga_region_put - release a reference to a region + * + * @region: FPGA region + */ +static void fpga_region_put(struct fpga_region *region) +{ + struct device *dev = ®ion->dev; + + dev_dbg(®ion->dev, "put\n"); + + module_put(dev->parent->driver->owner); + of_node_put(dev->of_node); + put_device(dev); + mutex_unlock(®ion->mutex); +} + +/** + * fpga_region_get_manager - get exclusive reference for FPGA manager + * @region: FPGA region + * + * Get FPGA Manager from "fpga-mgr" property or from ancestor region. + * + * Caller should call fpga_mgr_put() when done with manager. + * + * Return: fpga manager struct or IS_ERR() condition containing error code. + */ +static struct fpga_manager *fpga_region_get_manager(struct fpga_region *region) +{ + struct device *dev = ®ion->dev; + struct device_node *np = dev->of_node; + struct device_node *mgr_node; + struct fpga_manager *mgr; + + of_node_get(np); + while (np) { + if (of_device_is_compatible(np, "fpga-region")) { + mgr_node = of_parse_phandle(np, "fpga-mgr", 0); + if (mgr_node) { + mgr = of_fpga_mgr_get(mgr_node); + of_node_put(np); + return mgr; + } + } + np = of_get_next_parent(np); + } + of_node_put(np); + + return ERR_PTR(-EINVAL); +} + +/** + * fpga_region_get_bridges - create a list of bridges + * @region: FPGA region + * @overlay: device node of the overlay + * + * Create a list of bridges including the parent bridge and the bridges + * specified by "fpga-bridges" property. Note that the + * fpga_bridges_enable/disable/put functions are all fine with an empty list + * if that happens. + * + * Caller should call fpga_bridges_put(®ion->bridge_list) when + * done with the bridges. + * + * Return 0 for success (even if there are no bridges specified) + * or -EBUSY if any of the bridges are in use. + */ +static int fpga_region_get_bridges(struct fpga_region *region, + struct device_node *overlay) +{ + struct device *dev = ®ion->dev; + struct device_node *region_np = dev->of_node; + struct device_node *br, *np, *parent_br = NULL; + int i, ret; + + /* If parent is a bridge, add to list */ + ret = fpga_bridge_get_to_list(region_np->parent, region->info, + ®ion->bridge_list); + if (ret == -EBUSY) + return ret; + + if (!ret) + parent_br = region_np->parent; + + /* If overlay has a list of bridges, use it. */ + if (of_parse_phandle(overlay, "fpga-bridges", 0)) + np = overlay; + else + np = region_np; + + for (i = 0; ; i++) { + br = of_parse_phandle(np, "fpga-bridges", i); + if (!br) + break; + + /* If parent bridge is in list, skip it. */ + if (br == parent_br) + continue; + + /* If node is a bridge, get it and add to list */ + ret = fpga_bridge_get_to_list(br, region->info, + ®ion->bridge_list); + + /* If any of the bridges are in use, give up */ + if (ret == -EBUSY) { + fpga_bridges_put(®ion->bridge_list); + return -EBUSY; + } + } + + return 0; +} + +/** + * fpga_region_program_fpga - program FPGA + * @region: FPGA region + * @firmware_name: name of FPGA image firmware file + * @overlay: device node of the overlay + * Program an FPGA using information in the device tree. + * Function assumes that there is a firmware-name property. + * Return 0 for success or negative error code. + */ +static int fpga_region_program_fpga(struct fpga_region *region, + const char *firmware_name, + struct device_node *overlay) +{ + struct fpga_manager *mgr; + int ret; + + region = fpga_region_get(region); + if (IS_ERR(region)) { + pr_err("failed to get fpga region\n"); + return PTR_ERR(region); + } + + mgr = fpga_region_get_manager(region); + if (IS_ERR(mgr)) { + pr_err("failed to get fpga region manager\n"); + return PTR_ERR(mgr); + } + + ret = fpga_region_get_bridges(region, overlay); + if (ret) { + pr_err("failed to get fpga region bridges\n"); + goto err_put_mgr; + } + + ret = fpga_bridges_disable(®ion->bridge_list); + if (ret) { + pr_err("failed to disable region bridges\n"); + goto err_put_br; + } + + ret = fpga_mgr_firmware_load(mgr, region->info, firmware_name); + if (ret) { + pr_err("failed to load fpga image\n"); + goto err_put_br; + } + + ret = fpga_bridges_enable(®ion->bridge_list); + if (ret) { + pr_err("failed to enable region bridges\n"); + goto err_put_br; + } + + fpga_mgr_put(mgr); + fpga_region_put(region); + + return 0; + +err_put_br: + fpga_bridges_put(®ion->bridge_list); +err_put_mgr: + fpga_mgr_put(mgr); + fpga_region_put(region); + + return ret; +} + +/** + * child_regions_with_firmware + * @overlay: device node of the overlay + * + * If the overlay adds child FPGA regions, they are not allowed to have + * firmware-name property. + * + * Return 0 for OK or -EINVAL if child FPGA region adds firmware-name. + */ +static int child_regions_with_firmware(struct device_node *overlay) +{ + struct device_node *child_region; + const char *child_firmware_name; + int ret = 0; + + of_node_get(overlay); + + child_region = of_find_matching_node(overlay, fpga_region_of_match); + while (child_region) { + if (!of_property_read_string(child_region, "firmware-name", + &child_firmware_name)) { + ret = -EINVAL; + break; + } + child_region = of_find_matching_node(child_region, + fpga_region_of_match); + } + + of_node_put(child_region); + + if (ret) + pr_err("firmware-name not allowed in child FPGA region: %s", + child_region->full_name); + + return ret; +} + +/** + * fpga_region_notify_pre_apply - pre-apply overlay notification + * + * @region: FPGA region that the overlay was applied to + * @nd: overlay notification data + * + * Called after when an overlay targeted to a FPGA Region is about to be + * applied. Function will check the properties that will be added to the FPGA + * region. If the checks pass, it will program the FPGA. + * + * The checks are: + * The overlay must add either firmware-name or external-fpga-config property + * to the FPGA Region. + * + * firmware-name : program the FPGA + * external-fpga-config : FPGA is already programmed + * + * The overlay can add other FPGA regions, but child FPGA regions cannot have a + * firmware-name property since those regions don't exist yet. + * + * If the overlay that breaks the rules, notifier returns an error and the + * overlay is rejected before it goes into the main tree. + * + * Returns 0 for success or negative error code for failure. + */ +static int fpga_region_notify_pre_apply(struct fpga_region *region, + struct of_overlay_notify_data *nd) +{ + const char *firmware_name = NULL; + struct fpga_image_info *info; + int ret; + + info = devm_kzalloc(®ion->dev, sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + region->info = info; + + /* Reject overlay if child FPGA Regions have firmware-name property */ + ret = child_regions_with_firmware(nd->overlay); + if (ret) + return ret; + + /* Read FPGA region properties from the overlay */ + if (of_property_read_bool(nd->overlay, "partial-fpga-config")) + info->flags |= FPGA_MGR_PARTIAL_RECONFIG; + + if (of_property_read_bool(nd->overlay, "external-fpga-config")) + info->flags |= FPGA_MGR_EXTERNAL_CONFIG; + + of_property_read_string(nd->overlay, "firmware-name", &firmware_name); + + of_property_read_u32(nd->overlay, "region-unfreeze-timeout-us", + &info->enable_timeout_us); + + of_property_read_u32(nd->overlay, "region-freeze-timeout-us", + &info->disable_timeout_us); + + /* If FPGA was externally programmed, don't specify firmware */ + if ((info->flags & FPGA_MGR_EXTERNAL_CONFIG) && firmware_name) { + pr_err("error: specified firmware and external-fpga-config"); + return -EINVAL; + } + + /* FPGA is already configured externally. We're done. */ + if (info->flags & FPGA_MGR_EXTERNAL_CONFIG) + return 0; + + /* If we got this far, we should be programming the FPGA */ + if (!firmware_name) { + pr_err("should specify firmware-name or external-fpga-config\n"); + return -EINVAL; + } + + return fpga_region_program_fpga(region, firmware_name, nd->overlay); +} + +/** + * fpga_region_notify_post_remove - post-remove overlay notification + * + * @region: FPGA region that was targeted by the overlay that was removed + * @nd: overlay notification data + * + * Called after an overlay has been removed if the overlay's target was a + * FPGA region. + */ +static void fpga_region_notify_post_remove(struct fpga_region *region, + struct of_overlay_notify_data *nd) +{ + fpga_bridges_disable(®ion->bridge_list); + fpga_bridges_put(®ion->bridge_list); + devm_kfree(®ion->dev, region->info); + region->info = NULL; +} + +/** + * of_fpga_region_notify - reconfig notifier for dynamic DT changes + * @nb: notifier block + * @action: notifier action + * @arg: reconfig data + * + * This notifier handles programming a FPGA when a "firmware-name" property is + * added to a fpga-region. + * + * Returns NOTIFY_OK or error if FPGA programming fails. + */ +static int of_fpga_region_notify(struct notifier_block *nb, + unsigned long action, void *arg) +{ + struct of_overlay_notify_data *nd = arg; + struct fpga_region *region; + int ret; + + switch (action) { + case OF_OVERLAY_PRE_APPLY: + pr_debug("%s OF_OVERLAY_PRE_APPLY\n", __func__); + break; + case OF_OVERLAY_POST_APPLY: + pr_debug("%s OF_OVERLAY_POST_APPLY\n", __func__); + return NOTIFY_OK; /* not for us */ + case OF_OVERLAY_PRE_REMOVE: + pr_debug("%s OF_OVERLAY_PRE_REMOVE\n", __func__); + return NOTIFY_OK; /* not for us */ + case OF_OVERLAY_POST_REMOVE: + pr_debug("%s OF_OVERLAY_POST_REMOVE\n", __func__); + break; + default: /* should not happen */ + return NOTIFY_OK; + } + + region = fpga_region_find(nd->target); + if (!region) + return NOTIFY_OK; + + ret = 0; + switch (action) { + case OF_OVERLAY_PRE_APPLY: + ret = fpga_region_notify_pre_apply(region, nd); + break; + + case OF_OVERLAY_POST_REMOVE: + fpga_region_notify_post_remove(region, nd); + break; + } + + put_device(®ion->dev); + + if (ret) + return notifier_from_errno(ret); + + return NOTIFY_OK; +} + +static struct notifier_block fpga_region_of_nb = { + .notifier_call = of_fpga_region_notify, +}; + +static int fpga_region_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + struct fpga_region *region; + int id, ret = 0; + + region = kzalloc(sizeof(*region), GFP_KERNEL); + if (!region) + return -ENOMEM; + + id = ida_simple_get(&fpga_region_ida, 0, 0, GFP_KERNEL); + if (id < 0) { + ret = id; + goto err_kfree; + } + + mutex_init(®ion->mutex); + INIT_LIST_HEAD(®ion->bridge_list); + + device_initialize(®ion->dev); + region->dev.class = fpga_region_class; + region->dev.parent = dev; + region->dev.of_node = np; + region->dev.id = id; + dev_set_drvdata(dev, region); + + ret = dev_set_name(®ion->dev, "region%d", id); + if (ret) + goto err_remove; + + ret = device_add(®ion->dev); + if (ret) + goto err_remove; + + of_platform_populate(np, fpga_region_of_match, NULL, ®ion->dev); + + dev_info(dev, "FPGA Region probed\n"); + + return 0; + +err_remove: + ida_simple_remove(&fpga_region_ida, id); +err_kfree: + kfree(region); + + return ret; +} + +static int fpga_region_remove(struct platform_device *pdev) +{ + struct fpga_region *region = platform_get_drvdata(pdev); + + device_unregister(®ion->dev); + + return 0; +} + +static struct platform_driver fpga_region_driver = { + .probe = fpga_region_probe, + .remove = fpga_region_remove, + .driver = { + .name = "fpga-region", + .of_match_table = of_match_ptr(fpga_region_of_match), + }, +}; + +static void fpga_region_dev_release(struct device *dev) +{ + struct fpga_region *region = to_fpga_region(dev); + + ida_simple_remove(&fpga_region_ida, region->dev.id); + kfree(region); +} + +/** + * fpga_region_init - init function for fpga_region class + * Creates the fpga_region class and registers a reconfig notifier. + */ +static int __init fpga_region_init(void) +{ + int ret; + + fpga_region_class = class_create(THIS_MODULE, "fpga_region"); + if (IS_ERR(fpga_region_class)) + return PTR_ERR(fpga_region_class); + + fpga_region_class->dev_release = fpga_region_dev_release; + + ret = of_overlay_notifier_register(&fpga_region_of_nb); + if (ret) + goto err_class; + + ret = platform_driver_register(&fpga_region_driver); + if (ret) + goto err_plat; + + return 0; + +err_plat: + of_overlay_notifier_unregister(&fpga_region_of_nb); +err_class: + class_destroy(fpga_region_class); + ida_destroy(&fpga_region_ida); + return ret; +} + +static void __exit fpga_region_exit(void) +{ + platform_driver_unregister(&fpga_region_driver); + of_overlay_notifier_unregister(&fpga_region_of_nb); + class_destroy(fpga_region_class); + ida_destroy(&fpga_region_ida); +} + +subsys_initcall(fpga_region_init); +module_exit(fpga_region_exit); + +MODULE_DESCRIPTION("FPGA Region"); +MODULE_AUTHOR("Alan Tull "); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h index 55803186e0ea..96a1a3311649 100644 --- a/include/linux/fpga/fpga-mgr.h +++ b/include/linux/fpga/fpga-mgr.h @@ -65,8 +65,10 @@ enum fpga_mgr_states { /* * FPGA Manager flags * FPGA_MGR_PARTIAL_RECONFIG: do partial reconfiguration if supported + * FPGA_MGR_EXTERNAL_CONFIG: FPGA has been configured prior to Linux booting */ #define FPGA_MGR_PARTIAL_RECONFIG BIT(0) +#define FPGA_MGR_EXTERNAL_CONFIG BIT(1) /** * struct fpga_image_info - information specific to a FPGA image -- cgit v1.2.3 From fac51482577d5e05bbb0efa8d602a3c2111098bf Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 28 Oct 2016 09:45:28 +0100 Subject: drivers: base: cacheinfo: fix x86 with CONFIG_OF enabled With CONFIG_OF enabled on x86, we get the following error on boot: " Failed to find cpu0 device node Unable to detect cache hierarchy from DT for CPU 0 " and the cacheinfo fails to get populated in the corresponding sysfs entries. This is because cache_setup_of_node looks for of_node for setting up the shared cpu_map without checking that it's already populated in the architecture specific callback. In order to indicate that the shared cpu_map is already populated, this patch introduces a boolean `cpu_map_populated` in struct cpu_cacheinfo that can be used by the generic code to skip cache_shared_cpu_map_setup. This patch also sets that boolean for x86. Cc: Greg Kroah-Hartman Signed-off-by: Sudeep Holla Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel_cacheinfo.c | 2 ++ drivers/base/cacheinfo.c | 3 +++ include/linux/cacheinfo.h | 1 + 3 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index de6626c18e42..be6337156502 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -934,6 +934,8 @@ static int __populate_cache_leaves(unsigned int cpu) ci_leaf_init(this_leaf++, &id4_regs); __cache_cpumap_setup(cpu, idx, &id4_regs); } + this_cpu_ci->cpu_map_populated = true; + return 0; } diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index e9fd32e91668..ecde8957835a 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -106,6 +106,9 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) unsigned int index; int ret; + if (this_cpu_ci->cpu_map_populated) + return 0; + ret = cache_setup_of_node(cpu); if (ret) return ret; diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 2189935075b4..a951fd10aaaa 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -71,6 +71,7 @@ struct cpu_cacheinfo { struct cacheinfo *info_list; unsigned int num_levels; unsigned int num_leaves; + bool cpu_map_populated; }; /* -- cgit v1.2.3 From d49187e97e94e2eb613cb6fed810356972077cc3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Nov 2016 07:32:33 -0800 Subject: nvme: introduce struct nvme_request This adds a shared per-request structure for all NVMe I/O. This structure is embedded as the first member in all NVMe transport drivers request private data and allows to implement common functionality between the drivers. The first use is to replace the current abuse of the SCSI command passthrough fields in struct request for the NVMe command passthrough, but it will grow a field more fields to allow implementing things like common abort handlers in the future. The passthrough commands are handled by having a pointer to the SQE (struct nvme_command) in struct nvme_request, and the union of the possible result fields, which had to be turned from an anonymous into a named union for that purpose. This avoids having to pass a reference to a full CQE around and thus makes checking the result a lot more lightweight. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 28 +++++++++++++--------------- drivers/nvme/host/fabrics.c | 26 +++++++++++++------------- drivers/nvme/host/lightnvm.c | 31 +++++++------------------------ drivers/nvme/host/nvme.h | 16 +++++++++++++++- drivers/nvme/host/pci.c | 4 ++-- drivers/nvme/host/rdma.c | 11 +++-------- drivers/nvme/target/core.c | 8 ++++---- drivers/nvme/target/fabrics-cmd.c | 14 +++++++------- drivers/nvme/target/loop.c | 12 ++++++------ drivers/nvme/target/nvmet.h | 2 +- include/linux/nvme.h | 10 +++++----- 11 files changed, 76 insertions(+), 86 deletions(-) (limited to 'include/linux') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ef34f2f3566a..2fd632bcd975 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -221,8 +221,7 @@ struct request *nvme_alloc_request(struct request_queue *q, req->cmd_type = REQ_TYPE_DRV_PRIV; req->cmd_flags |= REQ_FAILFAST_DRIVER; - req->cmd = (unsigned char *)cmd; - req->cmd_len = sizeof(struct nvme_command); + nvme_req(req)->cmd = cmd; return req; } @@ -321,7 +320,7 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, int ret = 0; if (req->cmd_type == REQ_TYPE_DRV_PRIV) - memcpy(cmd, req->cmd, sizeof(*cmd)); + memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd)); else if (req_op(req) == REQ_OP_FLUSH) nvme_setup_flush(ns, cmd); else if (req_op(req) == REQ_OP_DISCARD) @@ -338,7 +337,7 @@ EXPORT_SYMBOL_GPL(nvme_setup_cmd); * if the result is positive, it's an NVM Express status code */ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, - struct nvme_completion *cqe, void *buffer, unsigned bufflen, + union nvme_result *result, void *buffer, unsigned bufflen, unsigned timeout, int qid, int at_head, int flags) { struct request *req; @@ -349,7 +348,6 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, return PTR_ERR(req); req->timeout = timeout ? timeout : ADMIN_TIMEOUT; - req->special = cqe; if (buffer && bufflen) { ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL); @@ -358,6 +356,8 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, } blk_execute_rq(req->q, NULL, req, at_head); + if (result) + *result = nvme_req(req)->result; ret = req->errors; out: blk_mq_free_request(req); @@ -379,7 +379,6 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, u32 *result, unsigned timeout) { bool write = nvme_is_write(cmd); - struct nvme_completion cqe; struct nvme_ns *ns = q->queuedata; struct gendisk *disk = ns ? ns->disk : NULL; struct request *req; @@ -392,7 +391,6 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, return PTR_ERR(req); req->timeout = timeout ? timeout : ADMIN_TIMEOUT; - req->special = &cqe; if (ubuffer && bufflen) { ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, @@ -447,7 +445,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, blk_execute_rq(req->q, disk, req, 0); ret = req->errors; if (result) - *result = le32_to_cpu(cqe.result); + *result = le32_to_cpu(nvme_req(req)->result.u32); if (meta && !ret && !write) { if (copy_to_user(meta_buffer, meta, meta_len)) ret = -EFAULT; @@ -596,7 +594,7 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, void *buffer, size_t buflen, u32 *result) { struct nvme_command c; - struct nvme_completion cqe; + union nvme_result res; int ret; memset(&c, 0, sizeof(c)); @@ -604,10 +602,10 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, c.features.nsid = cpu_to_le32(nsid); c.features.fid = cpu_to_le32(fid); - ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, buffer, buflen, 0, + ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, buffer, buflen, 0, NVME_QID_ANY, 0, 0); if (ret >= 0 && result) - *result = le32_to_cpu(cqe.result); + *result = le32_to_cpu(res.u32); return ret; } @@ -615,7 +613,7 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, void *buffer, size_t buflen, u32 *result) { struct nvme_command c; - struct nvme_completion cqe; + union nvme_result res; int ret; memset(&c, 0, sizeof(c)); @@ -623,10 +621,10 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, c.features.fid = cpu_to_le32(fid); c.features.dword11 = cpu_to_le32(dword11); - ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, + ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, buffer, buflen, 0, NVME_QID_ANY, 0, 0); if (ret >= 0 && result) - *result = le32_to_cpu(cqe.result); + *result = le32_to_cpu(res.u32); return ret; } @@ -1901,7 +1899,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, struct nvme_completion *cqe) { u16 status = le16_to_cpu(cqe->status) >> 1; - u32 result = le32_to_cpu(cqe->result); + u32 result = le32_to_cpu(cqe->result.u32); if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) { ++ctrl->event_limit; diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 5a3f008d3480..68fb26b3bfb9 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -161,7 +161,7 @@ EXPORT_SYMBOL_GPL(nvmf_get_subsysnqn); int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) { struct nvme_command cmd; - struct nvme_completion cqe; + union nvme_result res; int ret; memset(&cmd, 0, sizeof(cmd)); @@ -169,11 +169,11 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) cmd.prop_get.fctype = nvme_fabrics_type_property_get; cmd.prop_get.offset = cpu_to_le32(off); - ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &cqe, NULL, 0, 0, + ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &res, NULL, 0, 0, NVME_QID_ANY, 0, 0); if (ret >= 0) - *val = le64_to_cpu(cqe.result64); + *val = le64_to_cpu(res.u64); if (unlikely(ret != 0)) dev_err(ctrl->device, "Property Get error: %d, offset %#x\n", @@ -207,7 +207,7 @@ EXPORT_SYMBOL_GPL(nvmf_reg_read32); int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) { struct nvme_command cmd; - struct nvme_completion cqe; + union nvme_result res; int ret; memset(&cmd, 0, sizeof(cmd)); @@ -216,11 +216,11 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) cmd.prop_get.attrib = 1; cmd.prop_get.offset = cpu_to_le32(off); - ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &cqe, NULL, 0, 0, + ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &res, NULL, 0, 0, NVME_QID_ANY, 0, 0); if (ret >= 0) - *val = le64_to_cpu(cqe.result64); + *val = le64_to_cpu(res.u64); if (unlikely(ret != 0)) dev_err(ctrl->device, "Property Get error: %d, offset %#x\n", @@ -368,7 +368,7 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl, int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) { struct nvme_command cmd; - struct nvme_completion cqe; + union nvme_result res; struct nvmf_connect_data *data; int ret; @@ -400,16 +400,16 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE); strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE); - ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &cqe, + ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &res, data, sizeof(*data), 0, NVME_QID_ANY, 1, BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); if (ret) { - nvmf_log_connect_error(ctrl, ret, le32_to_cpu(cqe.result), + nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32), &cmd, data); goto out_free_data; } - ctrl->cntlid = le16_to_cpu(cqe.result16); + ctrl->cntlid = le16_to_cpu(res.u16); out_free_data: kfree(data); @@ -441,7 +441,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) { struct nvme_command cmd; struct nvmf_connect_data *data; - struct nvme_completion cqe; + union nvme_result res; int ret; memset(&cmd, 0, sizeof(cmd)); @@ -459,11 +459,11 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE); strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE); - ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &cqe, + ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res, data, sizeof(*data), 0, qid, 1, BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); if (ret) { - nvmf_log_connect_error(ctrl, ret, le32_to_cpu(cqe.result), + nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32), &cmd, data); } kfree(data); diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index f5e3011e31fc..442f67774ea9 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -146,14 +146,6 @@ struct nvme_nvm_command { }; }; -struct nvme_nvm_completion { - __le64 result; /* Used by LightNVM to return ppa completions */ - __le16 sq_head; /* how much of this queue may be reclaimed */ - __le16 sq_id; /* submission queue that generated this entry */ - __u16 command_id; /* of the command which completed */ - __le16 status; /* did the command fail, and if so, why? */ -}; - #define NVME_NVM_LP_MLC_PAIRS 886 struct nvme_nvm_lp_mlc { __le16 num_pairs; @@ -481,11 +473,8 @@ static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd, static void nvme_nvm_end_io(struct request *rq, int error) { struct nvm_rq *rqd = rq->end_io_data; - struct nvme_nvm_completion *cqe = rq->special; - - if (cqe) - rqd->ppa_status = le64_to_cpu(cqe->result); + rqd->ppa_status = nvme_req(rq)->result.u64; nvm_end_io(rqd, error); kfree(rq->cmd); @@ -500,20 +489,18 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) struct bio *bio = rqd->bio; struct nvme_nvm_command *cmd; - rq = blk_mq_alloc_request(q, bio_data_dir(bio), 0); - if (IS_ERR(rq)) + cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL); + if (!cmd) return -ENOMEM; - cmd = kzalloc(sizeof(struct nvme_nvm_command) + - sizeof(struct nvme_nvm_completion), GFP_KERNEL); - if (!cmd) { - blk_mq_free_request(rq); + rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY); + if (IS_ERR(rq)) { + kfree(cmd); return -ENOMEM; } + rq->cmd_flags &= ~REQ_FAILFAST_DRIVER; - rq->cmd_type = REQ_TYPE_DRV_PRIV; rq->ioprio = bio_prio(bio); - if (bio_has_data(bio)) rq->nr_phys_segments = bio_phys_segments(q, bio); @@ -522,10 +509,6 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) nvme_nvm_rqtocmd(rq, rqd, ns, cmd); - rq->cmd = (unsigned char *)cmd; - rq->cmd_len = sizeof(struct nvme_nvm_command); - rq->special = cmd + 1; - rq->end_io_data = rqd; blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_io); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index d47f5a5d18c7..5e64957a9b96 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -79,6 +79,20 @@ enum nvme_quirks { NVME_QUIRK_DELAY_BEFORE_CHK_RDY = (1 << 3), }; +/* + * Common request structure for NVMe passthrough. All drivers must have + * this structure as the first member of their request-private data. + */ +struct nvme_request { + struct nvme_command *cmd; + union nvme_result result; +}; + +static inline struct nvme_request *nvme_req(struct request *req) +{ + return blk_mq_rq_to_pdu(req); +} + /* The below value is the specific amount of delay needed before checking * readiness in case of the PCI_DEVICE(0x1c58, 0x0003), which needs the * NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was @@ -278,7 +292,7 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buf, unsigned bufflen); int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, - struct nvme_completion *cqe, void *buffer, unsigned bufflen, + union nvme_result *result, void *buffer, unsigned bufflen, unsigned timeout, int qid, int at_head, int flags); int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, void __user *ubuffer, unsigned bufflen, u32 *result, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 0955e9d22020..de8e0505d979 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -140,6 +140,7 @@ struct nvme_queue { * allocated to store the PRP list. */ struct nvme_iod { + struct nvme_request req; struct nvme_queue *nvmeq; int aborted; int npages; /* In the PRP list. 0 means small pool in use */ @@ -707,8 +708,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) } req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id); - if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special) - memcpy(req->special, &cqe, sizeof(cqe)); + nvme_req(req)->result = cqe.result; blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1); } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 5a8388177959..0b8a161cf881 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -66,6 +66,7 @@ struct nvme_rdma_qe { struct nvme_rdma_queue; struct nvme_rdma_request { + struct nvme_request req; struct ib_mr *mr; struct nvme_rdma_qe sqe; struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS]; @@ -1117,13 +1118,10 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg, int aer_idx) static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, struct nvme_completion *cqe, struct ib_wc *wc, int tag) { - u16 status = le16_to_cpu(cqe->status); struct request *rq; struct nvme_rdma_request *req; int ret = 0; - status >>= 1; - rq = blk_mq_tag_to_rq(nvme_rdma_tagset(queue), cqe->command_id); if (!rq) { dev_err(queue->ctrl->ctrl.device, @@ -1134,9 +1132,6 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, } req = blk_mq_rq_to_pdu(rq); - if (rq->cmd_type == REQ_TYPE_DRV_PRIV && rq->special) - memcpy(rq->special, cqe, sizeof(*cqe)); - if (rq->tag == tag) ret = 1; @@ -1144,8 +1139,8 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, wc->ex.invalidate_rkey == req->mr->rkey) req->mr->need_inval = false; - blk_mq_complete_request(rq, status); - + req->req.result = cqe->result; + blk_mq_complete_request(rq, le16_to_cpu(cqe->status) >> 1); return ret; } diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 6559d5afa7bf..c232552be2d8 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -617,7 +617,7 @@ u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, if (!subsys) { pr_warn("connect request for invalid subsystem %s!\n", subsysnqn); - req->rsp->result = IPO_IATTR_CONNECT_DATA(subsysnqn); + req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; } @@ -638,7 +638,7 @@ u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, pr_warn("could not find controller %d for subsys %s / host %s\n", cntlid, subsysnqn, hostnqn); - req->rsp->result = IPO_IATTR_CONNECT_DATA(cntlid); + req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; out: @@ -700,7 +700,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, if (!subsys) { pr_warn("connect request for invalid subsystem %s!\n", subsysnqn); - req->rsp->result = IPO_IATTR_CONNECT_DATA(subsysnqn); + req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); goto out; } @@ -709,7 +709,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, if (!nvmet_host_allowed(req, subsys, hostnqn)) { pr_info("connect by host %s for subsystem %s not allowed\n", hostnqn, subsysnqn); - req->rsp->result = IPO_IATTR_CONNECT_DATA(hostnqn); + req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn); up_read(&nvmet_config_sem); goto out_put_subsystem; } diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 9a97ae67e656..f4088198cd0d 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -69,7 +69,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req) } } - req->rsp->result64 = cpu_to_le64(val); + req->rsp->result.u64 = cpu_to_le64(val); nvmet_req_complete(req, status); } @@ -125,7 +125,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) d = kmap(sg_page(req->sg)) + req->sg->offset; /* zero out initial completion result, assign values as needed */ - req->rsp->result = 0; + req->rsp->result.u32 = 0; if (c->recfmt != 0) { pr_warn("invalid connect version (%d).\n", @@ -138,7 +138,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) pr_warn("connect attempt for invalid controller ID %#x\n", d->cntlid); status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; - req->rsp->result = IPO_IATTR_CONNECT_DATA(cntlid); + req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); goto out; } @@ -155,7 +155,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) pr_info("creating controller %d for NQN %s.\n", ctrl->cntlid, ctrl->hostnqn); - req->rsp->result16 = cpu_to_le16(ctrl->cntlid); + req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid); out: kunmap(sg_page(req->sg)); @@ -173,7 +173,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) d = kmap(sg_page(req->sg)) + req->sg->offset; /* zero out initial completion result, assign values as needed */ - req->rsp->result = 0; + req->rsp->result.u32 = 0; if (c->recfmt != 0) { pr_warn("invalid connect version (%d).\n", @@ -191,14 +191,14 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) if (unlikely(qid > ctrl->subsys->max_qid)) { pr_warn("invalid queue id (%d)\n", qid); status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; - req->rsp->result = IPO_IATTR_CONNECT_SQE(qid); + req->rsp->result.u32 = IPO_IATTR_CONNECT_SQE(qid); goto out_ctrl_put; } status = nvmet_install_queue(ctrl, req); if (status) { /* pass back cntlid that had the issue of installing queue */ - req->rsp->result16 = cpu_to_le16(ctrl->cntlid); + req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid); goto out_ctrl_put; } diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index d5df77d686b2..757e21a31128 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -36,6 +36,7 @@ (NVME_LOOP_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS) struct nvme_loop_iod { + struct nvme_request nvme_req; struct nvme_command cmd; struct nvme_completion rsp; struct nvmet_req req; @@ -112,10 +113,10 @@ static void nvme_loop_complete_rq(struct request *req) blk_mq_end_request(req, error); } -static void nvme_loop_queue_response(struct nvmet_req *nvme_req) +static void nvme_loop_queue_response(struct nvmet_req *req) { struct nvme_loop_iod *iod = - container_of(nvme_req, struct nvme_loop_iod, req); + container_of(req, struct nvme_loop_iod, req); struct nvme_completion *cqe = &iod->rsp; /* @@ -128,11 +129,10 @@ static void nvme_loop_queue_response(struct nvmet_req *nvme_req) cqe->command_id >= NVME_LOOP_AQ_BLKMQ_DEPTH)) { nvme_complete_async_event(&iod->queue->ctrl->ctrl, cqe); } else { - struct request *req = blk_mq_rq_from_pdu(iod); + struct request *rq = blk_mq_rq_from_pdu(iod); - if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special) - memcpy(req->special, cqe, sizeof(*cqe)); - blk_mq_complete_request(req, le16_to_cpu(cqe->status) >> 1); + iod->nvme_req.result = cqe->result; + blk_mq_complete_request(rq, le16_to_cpu(cqe->status) >> 1); } } diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 76b6eedccaf9..f9c76441e8c9 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -238,7 +238,7 @@ static inline void nvmet_set_status(struct nvmet_req *req, u16 status) static inline void nvmet_set_result(struct nvmet_req *req, u32 result) { - req->rsp->result = cpu_to_le32(result); + req->rsp->result.u32 = cpu_to_le32(result); } /* diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 7676557ce357..18ce9f7cc881 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -949,11 +949,11 @@ struct nvme_completion { /* * Used by Admin and Fabrics commands to return data: */ - union { - __le16 result16; - __le32 result; - __le64 result64; - }; + union nvme_result { + __le16 u16; + __le32 u32; + __le64 u64; + } result; __le16 sq_head; /* how much of this queue may be reclaimed */ __le16 sq_id; /* submission queue that generated this entry */ __u16 command_id; /* of the command which completed */ -- cgit v1.2.3 From cf43e6be865a582ba66ee4747ae27a0513f6bba1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 7 Nov 2016 21:32:37 -0700 Subject: block: add scalable completion tracking of requests For legacy block, we simply track them in the request queue. For blk-mq, we track them on a per-sw queue basis, which we can then sum up through the hardware queues and finally to a per device state. The stats are tracked in, roughly, 0.1s interval windows. Add sysfs files to display the stats. The feature is off by default, to avoid any extra overhead. In-kernel users of it can turn it on by setting QUEUE_FLAG_STATS in the queue flags. We currently don't turn it on if someone just reads any of the stats files, that is something we could add as well. Signed-off-by: Jens Axboe --- block/Makefile | 2 +- block/blk-core.c | 14 ++- block/blk-mq-sysfs.c | 47 +++++++++ block/blk-mq.c | 25 +++++ block/blk-mq.h | 3 + block/blk-stat.c | 248 ++++++++++++++++++++++++++++++++++++++++++++++ block/blk-stat.h | 42 ++++++++ block/blk-sysfs.c | 26 +++++ include/linux/blk_types.h | 16 +++ include/linux/blkdev.h | 7 ++ 10 files changed, 427 insertions(+), 3 deletions(-) create mode 100644 block/blk-stat.c create mode 100644 block/blk-stat.h (limited to 'include/linux') diff --git a/block/Makefile b/block/Makefile index 934dac73fb37..2528c596f7ec 100644 --- a/block/Makefile +++ b/block/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ - blk-lib.o blk-mq.o blk-mq-tag.o \ + blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \ genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ badblocks.o partitions/ diff --git a/block/blk-core.c b/block/blk-core.c index 2deca48a4a05..216372b01624 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2464,6 +2464,11 @@ void blk_start_request(struct request *req) { blk_dequeue_request(req); + if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) { + blk_stat_set_issue_time(&req->issue_stat); + req->rq_flags |= RQF_STATS; + } + /* * We are now handing the request to the hardware, initialize * resid_len to full count and add the timeout handler. @@ -2683,8 +2688,13 @@ EXPORT_SYMBOL_GPL(blk_unprep_request); */ void blk_finish_request(struct request *req, int error) { + struct request_queue *q = req->q; + + if (req->rq_flags & RQF_STATS) + blk_stat_add(&q->rq_stats[rq_data_dir(req)], req); + if (req->rq_flags & RQF_QUEUED) - blk_queue_end_tag(req->q, req); + blk_queue_end_tag(q, req); BUG_ON(blk_queued_rq(req)); @@ -2704,7 +2714,7 @@ void blk_finish_request(struct request *req, int error) if (blk_bidi_rq(req)) __blk_put_request(req->next_rq->q, req->next_rq); - __blk_put_request(req->q, req); + __blk_put_request(q, req); } } EXPORT_SYMBOL(blk_finish_request); diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 01fb455d3377..eacd3af72099 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -259,6 +259,47 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) return ret; } +static void blk_mq_stat_clear(struct blk_mq_hw_ctx *hctx) +{ + struct blk_mq_ctx *ctx; + unsigned int i; + + hctx_for_each_ctx(hctx, ctx, i) { + blk_stat_init(&ctx->stat[BLK_STAT_READ]); + blk_stat_init(&ctx->stat[BLK_STAT_WRITE]); + } +} + +static ssize_t blk_mq_hw_sysfs_stat_store(struct blk_mq_hw_ctx *hctx, + const char *page, size_t count) +{ + blk_mq_stat_clear(hctx); + return count; +} + +static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre) +{ + return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n", + pre, (long long) stat->nr_samples, + (long long) stat->mean, (long long) stat->min, + (long long) stat->max); +} + +static ssize_t blk_mq_hw_sysfs_stat_show(struct blk_mq_hw_ctx *hctx, char *page) +{ + struct blk_rq_stat stat[2]; + ssize_t ret; + + blk_stat_init(&stat[BLK_STAT_READ]); + blk_stat_init(&stat[BLK_STAT_WRITE]); + + blk_hctx_stat_get(hctx, stat); + + ret = print_stat(page, &stat[BLK_STAT_READ], "read :"); + ret += print_stat(page + ret, &stat[BLK_STAT_WRITE], "write:"); + return ret; +} + static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = { .attr = {.name = "dispatched", .mode = S_IRUGO }, .show = blk_mq_sysfs_dispatched_show, @@ -317,6 +358,11 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = { .show = blk_mq_hw_sysfs_poll_show, .store = blk_mq_hw_sysfs_poll_store, }; +static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_stat = { + .attr = {.name = "stats", .mode = S_IRUGO | S_IWUSR }, + .show = blk_mq_hw_sysfs_stat_show, + .store = blk_mq_hw_sysfs_stat_store, +}; static struct attribute *default_hw_ctx_attrs[] = { &blk_mq_hw_sysfs_queued.attr, @@ -327,6 +373,7 @@ static struct attribute *default_hw_ctx_attrs[] = { &blk_mq_hw_sysfs_cpus.attr, &blk_mq_hw_sysfs_active.attr, &blk_mq_hw_sysfs_poll.attr, + &blk_mq_hw_sysfs_stat.attr, NULL, }; diff --git a/block/blk-mq.c b/block/blk-mq.c index 6f5cb3f3dcac..19795886d46e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -30,6 +30,7 @@ #include "blk.h" #include "blk-mq.h" #include "blk-mq-tag.h" +#include "blk-stat.h" static DEFINE_MUTEX(all_q_mutex); static LIST_HEAD(all_q_list); @@ -403,10 +404,27 @@ static void blk_mq_ipi_complete_request(struct request *rq) put_cpu(); } +static void blk_mq_stat_add(struct request *rq) +{ + if (rq->rq_flags & RQF_STATS) { + /* + * We could rq->mq_ctx here, but there's less of a risk + * of races if we have the completion event add the stats + * to the local software queue. + */ + struct blk_mq_ctx *ctx; + + ctx = __blk_mq_get_ctx(rq->q, raw_smp_processor_id()); + blk_stat_add(&ctx->stat[rq_data_dir(rq)], rq); + } +} + static void __blk_mq_complete_request(struct request *rq) { struct request_queue *q = rq->q; + blk_mq_stat_add(rq); + if (!q->softirq_done_fn) blk_mq_end_request(rq, rq->errors); else @@ -450,6 +468,11 @@ void blk_mq_start_request(struct request *rq) if (unlikely(blk_bidi_rq(rq))) rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq); + if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { + blk_stat_set_issue_time(&rq->issue_stat); + rq->rq_flags |= RQF_STATS; + } + blk_add_timer(rq); /* @@ -1784,6 +1807,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, spin_lock_init(&__ctx->lock); INIT_LIST_HEAD(&__ctx->rq_list); __ctx->queue = q; + blk_stat_init(&__ctx->stat[BLK_STAT_READ]); + blk_stat_init(&__ctx->stat[BLK_STAT_WRITE]); /* If the cpu isn't online, the cpu is mapped to first hctx */ if (!cpu_online(i)) diff --git a/block/blk-mq.h b/block/blk-mq.h index ac772dac7ce8..b444370ae05b 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -1,6 +1,8 @@ #ifndef INT_BLK_MQ_H #define INT_BLK_MQ_H +#include "blk-stat.h" + struct blk_mq_tag_set; struct blk_mq_ctx { @@ -18,6 +20,7 @@ struct blk_mq_ctx { /* incremented at completion time */ unsigned long ____cacheline_aligned_in_smp rq_completed[2]; + struct blk_rq_stat stat[2]; struct request_queue *queue; struct kobject kobj; diff --git a/block/blk-stat.c b/block/blk-stat.c new file mode 100644 index 000000000000..688c958367ee --- /dev/null +++ b/block/blk-stat.c @@ -0,0 +1,248 @@ +/* + * Block stat tracking code + * + * Copyright (C) 2016 Jens Axboe + */ +#include +#include + +#include "blk-stat.h" +#include "blk-mq.h" + +static void blk_stat_flush_batch(struct blk_rq_stat *stat) +{ + const s32 nr_batch = READ_ONCE(stat->nr_batch); + const s32 nr_samples = READ_ONCE(stat->nr_batch); + + if (!nr_batch) + return; + if (!nr_samples) + stat->mean = div64_s64(stat->batch, nr_batch); + else { + stat->mean = div64_s64((stat->mean * nr_samples) + + stat->batch, + nr_batch + nr_samples); + } + + stat->nr_samples += nr_batch; + stat->nr_batch = stat->batch = 0; +} + +static void blk_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src) +{ + if (!src->nr_samples) + return; + + blk_stat_flush_batch(src); + + dst->min = min(dst->min, src->min); + dst->max = max(dst->max, src->max); + + if (!dst->nr_samples) + dst->mean = src->mean; + else { + dst->mean = div64_s64((src->mean * src->nr_samples) + + (dst->mean * dst->nr_samples), + dst->nr_samples + src->nr_samples); + } + dst->nr_samples += src->nr_samples; +} + +static void blk_mq_stat_get(struct request_queue *q, struct blk_rq_stat *dst) +{ + struct blk_mq_hw_ctx *hctx; + struct blk_mq_ctx *ctx; + uint64_t latest = 0; + int i, j, nr; + + blk_stat_init(&dst[BLK_STAT_READ]); + blk_stat_init(&dst[BLK_STAT_WRITE]); + + nr = 0; + do { + uint64_t newest = 0; + + queue_for_each_hw_ctx(q, hctx, i) { + hctx_for_each_ctx(hctx, ctx, j) { + if (!ctx->stat[BLK_STAT_READ].nr_samples && + !ctx->stat[BLK_STAT_WRITE].nr_samples) + continue; + if (ctx->stat[BLK_STAT_READ].time > newest) + newest = ctx->stat[BLK_STAT_READ].time; + if (ctx->stat[BLK_STAT_WRITE].time > newest) + newest = ctx->stat[BLK_STAT_WRITE].time; + } + } + + /* + * No samples + */ + if (!newest) + break; + + if (newest > latest) + latest = newest; + + queue_for_each_hw_ctx(q, hctx, i) { + hctx_for_each_ctx(hctx, ctx, j) { + if (ctx->stat[BLK_STAT_READ].time == newest) { + blk_stat_sum(&dst[BLK_STAT_READ], + &ctx->stat[BLK_STAT_READ]); + nr++; + } + if (ctx->stat[BLK_STAT_WRITE].time == newest) { + blk_stat_sum(&dst[BLK_STAT_WRITE], + &ctx->stat[BLK_STAT_WRITE]); + nr++; + } + } + } + /* + * If we race on finding an entry, just loop back again. + * Should be very rare. + */ + } while (!nr); + + dst[BLK_STAT_READ].time = dst[BLK_STAT_WRITE].time = latest; +} + +void blk_queue_stat_get(struct request_queue *q, struct blk_rq_stat *dst) +{ + if (q->mq_ops) + blk_mq_stat_get(q, dst); + else { + memcpy(&dst[BLK_STAT_READ], &q->rq_stats[BLK_STAT_READ], + sizeof(struct blk_rq_stat)); + memcpy(&dst[BLK_STAT_WRITE], &q->rq_stats[BLK_STAT_WRITE], + sizeof(struct blk_rq_stat)); + } +} + +void blk_hctx_stat_get(struct blk_mq_hw_ctx *hctx, struct blk_rq_stat *dst) +{ + struct blk_mq_ctx *ctx; + unsigned int i, nr; + + nr = 0; + do { + uint64_t newest = 0; + + hctx_for_each_ctx(hctx, ctx, i) { + if (!ctx->stat[BLK_STAT_READ].nr_samples && + !ctx->stat[BLK_STAT_WRITE].nr_samples) + continue; + + if (ctx->stat[BLK_STAT_READ].time > newest) + newest = ctx->stat[BLK_STAT_READ].time; + if (ctx->stat[BLK_STAT_WRITE].time > newest) + newest = ctx->stat[BLK_STAT_WRITE].time; + } + + if (!newest) + break; + + hctx_for_each_ctx(hctx, ctx, i) { + if (ctx->stat[BLK_STAT_READ].time == newest) { + blk_stat_sum(&dst[BLK_STAT_READ], + &ctx->stat[BLK_STAT_READ]); + nr++; + } + if (ctx->stat[BLK_STAT_WRITE].time == newest) { + blk_stat_sum(&dst[BLK_STAT_WRITE], + &ctx->stat[BLK_STAT_WRITE]); + nr++; + } + } + /* + * If we race on finding an entry, just loop back again. + * Should be very rare, as the window is only updated + * occasionally + */ + } while (!nr); +} + +static void __blk_stat_init(struct blk_rq_stat *stat, s64 time_now) +{ + stat->min = -1ULL; + stat->max = stat->nr_samples = stat->mean = 0; + stat->batch = stat->nr_batch = 0; + stat->time = time_now & BLK_STAT_NSEC_MASK; +} + +void blk_stat_init(struct blk_rq_stat *stat) +{ + __blk_stat_init(stat, ktime_to_ns(ktime_get())); +} + +static bool __blk_stat_is_current(struct blk_rq_stat *stat, s64 now) +{ + return (now & BLK_STAT_NSEC_MASK) == (stat->time & BLK_STAT_NSEC_MASK); +} + +bool blk_stat_is_current(struct blk_rq_stat *stat) +{ + return __blk_stat_is_current(stat, ktime_to_ns(ktime_get())); +} + +void blk_stat_add(struct blk_rq_stat *stat, struct request *rq) +{ + s64 now, value; + + now = __blk_stat_time(ktime_to_ns(ktime_get())); + if (now < blk_stat_time(&rq->issue_stat)) + return; + + if (!__blk_stat_is_current(stat, now)) + __blk_stat_init(stat, now); + + value = now - blk_stat_time(&rq->issue_stat); + if (value > stat->max) + stat->max = value; + if (value < stat->min) + stat->min = value; + + if (stat->batch + value < stat->batch || + stat->nr_batch + 1 == BLK_RQ_STAT_BATCH) + blk_stat_flush_batch(stat); + + stat->batch += value; + stat->nr_batch++; +} + +void blk_stat_clear(struct request_queue *q) +{ + if (q->mq_ops) { + struct blk_mq_hw_ctx *hctx; + struct blk_mq_ctx *ctx; + int i, j; + + queue_for_each_hw_ctx(q, hctx, i) { + hctx_for_each_ctx(hctx, ctx, j) { + blk_stat_init(&ctx->stat[BLK_STAT_READ]); + blk_stat_init(&ctx->stat[BLK_STAT_WRITE]); + } + } + } else { + blk_stat_init(&q->rq_stats[BLK_STAT_READ]); + blk_stat_init(&q->rq_stats[BLK_STAT_WRITE]); + } +} + +void blk_stat_set_issue_time(struct blk_issue_stat *stat) +{ + stat->time = (stat->time & BLK_STAT_MASK) | + (ktime_to_ns(ktime_get()) & BLK_STAT_TIME_MASK); +} + +/* + * Enable stat tracking, return whether it was enabled + */ +bool blk_stat_enable(struct request_queue *q) +{ + if (!test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { + set_bit(QUEUE_FLAG_STATS, &q->queue_flags); + return false; + } + + return true; +} diff --git a/block/blk-stat.h b/block/blk-stat.h new file mode 100644 index 000000000000..a2050a0a5314 --- /dev/null +++ b/block/blk-stat.h @@ -0,0 +1,42 @@ +#ifndef BLK_STAT_H +#define BLK_STAT_H + +/* + * ~0.13s window as a power-of-2 (2^27 nsecs) + */ +#define BLK_STAT_NSEC 134217728ULL +#define BLK_STAT_NSEC_MASK ~(BLK_STAT_NSEC - 1) + +/* + * Upper 3 bits can be used elsewhere + */ +#define BLK_STAT_RES_BITS 3 +#define BLK_STAT_SHIFT (64 - BLK_STAT_RES_BITS) +#define BLK_STAT_TIME_MASK ((1ULL << BLK_STAT_SHIFT) - 1) +#define BLK_STAT_MASK ~BLK_STAT_TIME_MASK + +enum { + BLK_STAT_READ = 0, + BLK_STAT_WRITE, +}; + +void blk_stat_add(struct blk_rq_stat *, struct request *); +void blk_hctx_stat_get(struct blk_mq_hw_ctx *, struct blk_rq_stat *); +void blk_queue_stat_get(struct request_queue *, struct blk_rq_stat *); +void blk_stat_clear(struct request_queue *); +void blk_stat_init(struct blk_rq_stat *); +bool blk_stat_is_current(struct blk_rq_stat *); +void blk_stat_set_issue_time(struct blk_issue_stat *); +bool blk_stat_enable(struct request_queue *); + +static inline u64 __blk_stat_time(u64 time) +{ + return time & BLK_STAT_TIME_MASK; +} + +static inline u64 blk_stat_time(struct blk_issue_stat *stat) +{ + return __blk_stat_time(stat->time); +} + +#endif diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 488c2e28feb8..9cdb7247727a 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -401,6 +401,26 @@ static ssize_t queue_dax_show(struct request_queue *q, char *page) return queue_var_show(blk_queue_dax(q), page); } +static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre) +{ + return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n", + pre, (long long) stat->nr_samples, + (long long) stat->mean, (long long) stat->min, + (long long) stat->max); +} + +static ssize_t queue_stats_show(struct request_queue *q, char *page) +{ + struct blk_rq_stat stat[2]; + ssize_t ret; + + blk_queue_stat_get(q, stat); + + ret = print_stat(page, &stat[BLK_STAT_READ], "read :"); + ret += print_stat(page + ret, &stat[BLK_STAT_WRITE], "write:"); + return ret; +} + static struct queue_sysfs_entry queue_requests_entry = { .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, .show = queue_requests_show, @@ -553,6 +573,11 @@ static struct queue_sysfs_entry queue_dax_entry = { .show = queue_dax_show, }; +static struct queue_sysfs_entry queue_stats_entry = { + .attr = {.name = "stats", .mode = S_IRUGO }, + .show = queue_stats_show, +}; + static struct attribute *default_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -582,6 +607,7 @@ static struct attribute *default_attrs[] = { &queue_poll_entry.attr, &queue_wc_entry.attr, &queue_dax_entry.attr, + &queue_stats_entry.attr, NULL, }; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 562ac46cb790..4d0044d09984 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -250,4 +250,20 @@ static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) return cookie & ((1u << BLK_QC_T_SHIFT) - 1); } +struct blk_issue_stat { + u64 time; +}; + +#define BLK_RQ_STAT_BATCH 64 + +struct blk_rq_stat { + s64 mean; + u64 min; + u64 max; + s32 nr_samples; + s32 nr_batch; + u64 batch; + s64 time; +}; + #endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d364be6e6959..303723a2e5b8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -117,6 +117,8 @@ typedef __u32 __bitwise req_flags_t; #define RQF_PM ((__force req_flags_t)(1 << 15)) /* on IO scheduler merge hash */ #define RQF_HASHED ((__force req_flags_t)(1 << 16)) +/* IO stats tracking on */ +#define RQF_STATS ((__force req_flags_t)(1 << 17)) /* flags that prevent us from merging requests: */ #define RQF_NOMERGE_FLAGS \ @@ -197,6 +199,7 @@ struct request { struct gendisk *rq_disk; struct hd_struct *part; unsigned long start_time; + struct blk_issue_stat issue_stat; #ifdef CONFIG_BLK_CGROUP struct request_list *rl; /* rl this rq is alloced from */ unsigned long long start_time_ns; @@ -492,6 +495,9 @@ struct request_queue { unsigned int nr_sorted; unsigned int in_flight[2]; + + struct blk_rq_stat rq_stats[2]; + /* * Number of active block driver functions for which blk_drain_queue() * must wait. Must be incremented around functions that unlock the @@ -585,6 +591,7 @@ struct request_queue { #define QUEUE_FLAG_FUA 24 /* device supports FUA writes */ #define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */ #define QUEUE_FLAG_DAX 26 /* device supports DAX */ +#define QUEUE_FLAG_STATS 27 /* track rq completion times */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ -- cgit v1.2.3 From 87760e5eef359788047d6fd54fc12eec74ce0d27 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 9 Nov 2016 12:38:14 -0700 Subject: block: hook up writeback throttling Enable throttling of buffered writeback to make it a lot more smooth, and has way less impact on other system activity. Background writeback should be, by definition, background activity. The fact that we flush huge bundles of it at the time means that it potentially has heavy impacts on foreground workloads, which isn't ideal. We can't easily limit the sizes of writes that we do, since that would impact file system layout in the presence of delayed allocation. So just throttle back buffered writeback, unless someone is waiting for it. The algorithm for when to throttle takes its inspiration in the CoDel networking scheduling algorithm. Like CoDel, blk-wb monitors the minimum latencies of requests over a window of time. In that window of time, if the minimum latency of any request exceeds a given target, then a scale count is incremented and the queue depth is shrunk. The next monitoring window is shrunk accordingly. Unlike CoDel, if we hit a window that exhibits good behavior, then we simply increment the scale count and re-calculate the limits for that scale value. This prevents us from oscillating between a close-to-ideal value and max all the time, instead remaining in the windows where we get good behavior. Unlike CoDel, blk-wb allows the scale count to to negative. This happens if we primarily have writes going on. Unlike positive scale counts, this doesn't change the size of the monitoring window. When the heavy writers finish, blk-bw quickly snaps back to it's stable state of a zero scale count. The patch registers a sysfs entry, 'wb_lat_usec'. This sets the latency target to me met. It defaults to 2 msec for non-rotational storage, and 75 msec for rotational storage. Setting this value to '0' disables blk-wb. Generally, a user would not have to touch this setting. We don't enable WBT on devices that are managed with CFQ, and have a non-root block cgroup attached. If we have a proportional share setup on this particular disk, then the wbt throttling will interfere with that. We don't have a strong need for wbt for that case, since we will rely on CFQ doing that for us. Signed-off-by: Jens Axboe --- Documentation/block/queue-sysfs.txt | 7 +++ block/Kconfig | 26 +++++++++++ block/blk-core.c | 17 ++++++- block/blk-mq.c | 26 ++++++++++- block/blk-settings.c | 4 ++ block/blk-sysfs.c | 88 +++++++++++++++++++++++++++++++++++++ block/cfq-iosched.c | 14 ++++++ include/linux/blkdev.h | 3 ++ 8 files changed, 181 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt index 2a3904030dea..87abf1ac2939 100644 --- a/Documentation/block/queue-sysfs.txt +++ b/Documentation/block/queue-sysfs.txt @@ -169,5 +169,12 @@ This is the number of bytes the device can write in a single write-same command. A value of '0' means write-same is not supported by this device. +wb_lat_usec (RW) +---------------- +If the device is registered for writeback throttling, then this file shows +the target minimum read latency. If this latency is exceeded in a given +window of time (see wb_window_usec), then the writeback throttling will start +scaling back writes. + Jens Axboe , February 2009 diff --git a/block/Kconfig b/block/Kconfig index 3a024440a669..8bf114a3858a 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -121,6 +121,32 @@ config BLK_CMDLINE_PARSER See Documentation/block/cmdline-partition.txt for more information. +config BLK_WBT + bool "Enable support for block device writeback throttling" + default n + ---help--- + Enabling this option enables the block layer to throttle buffered + background writeback from the VM, making it more smooth and having + less impact on foreground operations. The throttling is done + dynamically on an algorithm loosely based on CoDel, factoring in + the realtime performance of the disk. + +config BLK_WBT_SQ + bool "Single queue writeback throttling" + default n + depends on BLK_WBT + ---help--- + Enable writeback throttling by default on legacy single queue devices + +config BLK_WBT_MQ + bool "Multiqueue writeback throttling" + default y + depends on BLK_WBT + ---help--- + Enable writeback throttling by default on multiqueue devices. + Multiqueue currently doesn't have support for IO scheduling, + enabling this option is recommended. + menu "Partition Types" source "block/partitions/Kconfig" diff --git a/block/blk-core.c b/block/blk-core.c index 216372b01624..59f8129a4295 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -39,6 +39,7 @@ #include "blk.h" #include "blk-mq.h" +#include "blk-wbt.h" EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); @@ -882,6 +883,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, fail: blk_free_flush_queue(q->fq); + wbt_exit(q); return NULL; } EXPORT_SYMBOL(blk_init_allocated_queue); @@ -1344,6 +1346,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq) blk_delete_timer(rq); blk_clear_rq_complete(rq); trace_block_rq_requeue(q, rq); + wbt_requeue(q->rq_wb, &rq->issue_stat); if (rq->rq_flags & RQF_QUEUED) blk_queue_end_tag(q, rq); @@ -1436,6 +1439,8 @@ void __blk_put_request(struct request_queue *q, struct request *req) /* this is a bio leak */ WARN_ON(req->bio != NULL); + wbt_done(q->rq_wb, &req->issue_stat); + /* * Request may not have originated from ll_rw_blk. if not, * it didn't come out of our reserved rq pools @@ -1663,6 +1668,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) int el_ret, where = ELEVATOR_INSERT_SORT; struct request *req; unsigned int request_count = 0; + unsigned int wb_acct; /* * low level driver can indicate that it wants pages above a @@ -1715,17 +1721,22 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) } get_rq: + wb_acct = wbt_wait(q->rq_wb, bio, q->queue_lock); + /* * Grab a free request. This is might sleep but can not fail. * Returns with the queue unlocked. */ req = get_request(q, bio->bi_opf, bio, GFP_NOIO); if (IS_ERR(req)) { + __wbt_done(q->rq_wb, wb_acct); bio->bi_error = PTR_ERR(req); bio_endio(bio); goto out_unlock; } + wbt_track(&req->issue_stat, wb_acct); + /* * After dropping the lock and possibly sleeping here, our request * may now be mergeable after it had proven unmergeable (above). @@ -2467,6 +2478,7 @@ void blk_start_request(struct request *req) if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) { blk_stat_set_issue_time(&req->issue_stat); req->rq_flags |= RQF_STATS; + wbt_issue(req->q->rq_wb, &req->issue_stat); } /* @@ -2708,9 +2720,10 @@ void blk_finish_request(struct request *req, int error) blk_account_io_done(req); - if (req->end_io) + if (req->end_io) { + wbt_done(req->q->rq_wb, &req->issue_stat); req->end_io(req, error); - else { + } else { if (blk_bidi_rq(req)) __blk_put_request(req->next_rq->q, req->next_rq); diff --git a/block/blk-mq.c b/block/blk-mq.c index 19795886d46e..d180c989a0e5 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -31,6 +31,7 @@ #include "blk-mq.h" #include "blk-mq-tag.h" #include "blk-stat.h" +#include "blk-wbt.h" static DEFINE_MUTEX(all_q_mutex); static LIST_HEAD(all_q_list); @@ -326,6 +327,8 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, if (rq->rq_flags & RQF_MQ_INFLIGHT) atomic_dec(&hctx->nr_active); + + wbt_done(q->rq_wb, &rq->issue_stat); rq->rq_flags = 0; clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); @@ -354,6 +357,7 @@ inline void __blk_mq_end_request(struct request *rq, int error) blk_account_io_done(rq); if (rq->end_io) { + wbt_done(rq->q->rq_wb, &rq->issue_stat); rq->end_io(rq, error); } else { if (unlikely(blk_bidi_rq(rq))) @@ -471,6 +475,7 @@ void blk_mq_start_request(struct request *rq) if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { blk_stat_set_issue_time(&rq->issue_stat); rq->rq_flags |= RQF_STATS; + wbt_issue(q->rq_wb, &rq->issue_stat); } blk_add_timer(rq); @@ -508,6 +513,7 @@ static void __blk_mq_requeue_request(struct request *rq) struct request_queue *q = rq->q; trace_block_rq_requeue(q, rq); + wbt_requeue(q->rq_wb, &rq->issue_stat); if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) { if (q->dma_drain_size && blk_rq_bytes(rq)) @@ -1339,6 +1345,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) struct blk_plug *plug; struct request *same_queue_rq = NULL; blk_qc_t cookie; + unsigned int wb_acct; blk_queue_bounce(q, &bio); @@ -1353,9 +1360,15 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) return BLK_QC_T_NONE; + wb_acct = wbt_wait(q->rq_wb, bio, NULL); + rq = blk_mq_map_request(q, bio, &data); - if (unlikely(!rq)) + if (unlikely(!rq)) { + __wbt_done(q->rq_wb, wb_acct); return BLK_QC_T_NONE; + } + + wbt_track(&rq->issue_stat, wb_acct); cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num); @@ -1439,6 +1452,7 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) struct blk_mq_alloc_data data; struct request *rq; blk_qc_t cookie; + unsigned int wb_acct; blk_queue_bounce(q, &bio); @@ -1455,9 +1469,15 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) } else request_count = blk_plug_queued_count(q); + wb_acct = wbt_wait(q->rq_wb, bio, NULL); + rq = blk_mq_map_request(q, bio, &data); - if (unlikely(!rq)) + if (unlikely(!rq)) { + __wbt_done(q->rq_wb, wb_acct); return BLK_QC_T_NONE; + } + + wbt_track(&rq->issue_stat, wb_acct); cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num); @@ -2139,6 +2159,8 @@ void blk_mq_free_queue(struct request_queue *q) list_del_init(&q->all_q_node); mutex_unlock(&all_q_mutex); + wbt_exit(q); + blk_mq_del_queue_tag_set(q); blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); diff --git a/block/blk-settings.c b/block/blk-settings.c index 9cf053759363..c7ccabc0ec3e 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -13,6 +13,7 @@ #include #include "blk.h" +#include "blk-wbt.h" unsigned long blk_max_low_pfn; EXPORT_SYMBOL(blk_max_low_pfn); @@ -845,6 +846,7 @@ EXPORT_SYMBOL_GPL(blk_queue_flush_queueable); void blk_set_queue_depth(struct request_queue *q, unsigned int depth) { q->queue_depth = depth; + wbt_set_queue_depth(q->rq_wb, depth); } EXPORT_SYMBOL(blk_set_queue_depth); @@ -868,6 +870,8 @@ void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua) else queue_flag_clear(QUEUE_FLAG_FUA, q); spin_unlock_irq(q->queue_lock); + + wbt_set_write_cache(q->rq_wb, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); } EXPORT_SYMBOL_GPL(blk_queue_write_cache); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 9cdb7247727a..9262d2d60a09 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -13,6 +13,7 @@ #include "blk.h" #include "blk-mq.h" +#include "blk-wbt.h" struct queue_sysfs_entry { struct attribute attr; @@ -41,6 +42,19 @@ queue_var_store(unsigned long *var, const char *page, size_t count) return count; } +static ssize_t queue_var_store64(u64 *var, const char *page) +{ + int err; + u64 v; + + err = kstrtou64(page, 10, &v); + if (err < 0) + return err; + + *var = v; + return 0; +} + static ssize_t queue_requests_show(struct request_queue *q, char *page) { return queue_var_show(q->nr_requests, (page)); @@ -364,6 +378,32 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page, return ret; } +static ssize_t queue_wb_lat_show(struct request_queue *q, char *page) +{ + if (!q->rq_wb) + return -EINVAL; + + return sprintf(page, "%llu\n", div_u64(q->rq_wb->min_lat_nsec, 1000)); +} + +static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, + size_t count) +{ + ssize_t ret; + u64 val; + + if (!q->rq_wb) + return -EINVAL; + + ret = queue_var_store64(&val, page); + if (ret < 0) + return ret; + + q->rq_wb->min_lat_nsec = val * 1000ULL; + wbt_update_limits(q->rq_wb); + return count; +} + static ssize_t queue_wc_show(struct request_queue *q, char *page) { if (test_bit(QUEUE_FLAG_WC, &q->queue_flags)) @@ -578,6 +618,12 @@ static struct queue_sysfs_entry queue_stats_entry = { .show = queue_stats_show, }; +static struct queue_sysfs_entry queue_wb_lat_entry = { + .attr = {.name = "wbt_lat_usec", .mode = S_IRUGO | S_IWUSR }, + .show = queue_wb_lat_show, + .store = queue_wb_lat_store, +}; + static struct attribute *default_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -608,6 +654,7 @@ static struct attribute *default_attrs[] = { &queue_wc_entry.attr, &queue_dax_entry.attr, &queue_stats_entry.attr, + &queue_wb_lat_entry.attr, NULL, }; @@ -682,6 +729,7 @@ static void blk_release_queue(struct kobject *kobj) struct request_queue *q = container_of(kobj, struct request_queue, kobj); + wbt_exit(q); bdi_exit(&q->backing_dev_info); blkcg_exit_queue(q); @@ -722,6 +770,44 @@ struct kobj_type blk_queue_ktype = { .release = blk_release_queue, }; +static void blk_wb_stat_get(void *data, struct blk_rq_stat *stat) +{ + blk_queue_stat_get(data, stat); +} + +static void blk_wb_stat_clear(void *data) +{ + blk_stat_clear(data); +} + +static bool blk_wb_stat_is_current(struct blk_rq_stat *stat) +{ + return blk_stat_is_current(stat); +} + +static struct wb_stat_ops wb_stat_ops = { + .get = blk_wb_stat_get, + .is_current = blk_wb_stat_is_current, + .clear = blk_wb_stat_clear, +}; + +static void blk_wb_init(struct request_queue *q) +{ +#ifndef CONFIG_BLK_WBT_MQ + if (q->mq_ops) + return; +#endif +#ifndef CONFIG_BLK_WBT_SQ + if (q->request_fn) + return; +#endif + + /* + * If this fails, we don't get throttling + */ + wbt_init(q, &wb_stat_ops); +} + int blk_register_queue(struct gendisk *disk) { int ret; @@ -761,6 +847,8 @@ int blk_register_queue(struct gendisk *disk) if (q->mq_ops) blk_mq_register_dev(dev, q); + blk_wb_init(q); + if (!q->request_fn) return 0; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 61010511c5a0..e280d08ef6d7 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -16,6 +16,7 @@ #include #include #include "blk.h" +#include "blk-wbt.h" /* * tunables @@ -3762,9 +3763,11 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) struct cfq_data *cfqd = cic_to_cfqd(cic); struct cfq_queue *cfqq; uint64_t serial_nr; + bool nonroot_cg; rcu_read_lock(); serial_nr = bio_blkcg(bio)->css.serial_nr; + nonroot_cg = bio_blkcg(bio) != &blkcg_root; rcu_read_unlock(); /* @@ -3774,6 +3777,17 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) if (unlikely(!cfqd) || likely(cic->blkcg_serial_nr == serial_nr)) return; + /* + * If we have a non-root cgroup, we can depend on that to + * do proper throttling of writes. Turn off wbt for that + * case. + */ + if (nonroot_cg) { + struct request_queue *q = cfqd->queue; + + wbt_disable(q->rq_wb); + } + /* * Drop reference to queues. New queues will be assigned in new * group upon arrival of fresh requests. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 303723a2e5b8..15da9e430f90 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -38,6 +38,7 @@ struct bsg_job; struct blkcg_gq; struct blk_flush_queue; struct pr_ops; +struct rq_wb; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ @@ -383,6 +384,8 @@ struct request_queue { int nr_rqs[2]; /* # allocated [a]sync rqs */ int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ + struct rq_wb *rq_wb; + /* * If blkcg is not used, @q->root_rl serves all requests. If blkcg * is used, root blkg allocates from @q->root_rl and all other -- cgit v1.2.3 From ee7930ee27fe5240398cc302fa8eb4454725f188 Mon Sep 17 00:00:00 2001 From: Markus Mayer Date: Mon, 7 Nov 2016 10:02:23 -0800 Subject: cpufreq: stats: New sysfs attribute for clearing statistics Allow CPUfreq statistics to be cleared by writing anything to /sys/.../cpufreq/stats/reset. Signed-off-by: Markus Mayer Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/cpufreq-stats.txt | 6 ++++++ drivers/cpufreq/cpufreq_stats.c | 22 ++++++++++++++++++++++ include/linux/cpufreq.h | 4 ++++ 3 files changed, 32 insertions(+) (limited to 'include/linux') diff --git a/Documentation/cpu-freq/cpufreq-stats.txt b/Documentation/cpu-freq/cpufreq-stats.txt index 8d9773f23550..3c355f6ad834 100644 --- a/Documentation/cpu-freq/cpufreq-stats.txt +++ b/Documentation/cpu-freq/cpufreq-stats.txt @@ -44,11 +44,17 @@ the stats driver insertion. total 0 drwxr-xr-x 2 root root 0 May 14 16:06 . drwxr-xr-x 3 root root 0 May 14 15:58 .. +--w------- 1 root root 4096 May 14 16:06 reset -r--r--r-- 1 root root 4096 May 14 16:06 time_in_state -r--r--r-- 1 root root 4096 May 14 16:06 total_trans -r--r--r-- 1 root root 4096 May 14 16:06 trans_table -------------------------------------------------------------------------------- +- reset +Write-only attribute that can be used to reset the stat counters. This can be +useful for evaluating system behaviour under different governors without the +need for a reboot. + - time_in_state This gives the amount of time spent in each of the frequencies supported by this CPU. The cat output will have "