From fdc0074c5fc8c7adb8186cbb123fe2082d9bd05f Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 9 Feb 2015 18:23:20 +0800 Subject: ARM: sunxi: Have ARCH_SUNXI select RESET_CONTROLLER for clock driver usage As the sunxi usb clocks all contain a reset controller, it is not possible to build the sunxi clock driver without RESET_CONTROLLER enabled. Doing so results in an undefined symbol error: drivers/built-in.o: In function `sunxi_gates_clk_setup': linux/drivers/clk/sunxi/clk-sunxi.c:1071: undefined reference to `reset_controller_register' This is possible if building a minimal kernel without PHY_SUN4I_USB. The dependency issue is made visible at compile time instead of link time by the new A80 mmc clocks, which also use a reset control itself. This patch makes ARCH_SUNXI select ARCH_HAS_RESET_CONTROLLER and RESET_CONTROLLER. Fixes: 559482d1f950 ARM: sunxi: Split the various SoCs support in Kconfig Cc: # 3.16+ Reported-by: Lourens Rozema Acked-by: Arnd Bergmann Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- arch/arm/mach-sunxi/Kconfig | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/arm/mach-sunxi/Kconfig b/arch/arm/mach-sunxi/Kconfig index a77604fbaf25..81502b90dd91 100644 --- a/arch/arm/mach-sunxi/Kconfig +++ b/arch/arm/mach-sunxi/Kconfig @@ -1,10 +1,12 @@ menuconfig ARCH_SUNXI bool "Allwinner SoCs" if ARCH_MULTI_V7 select ARCH_REQUIRE_GPIOLIB + select ARCH_HAS_RESET_CONTROLLER select CLKSRC_MMIO select GENERIC_IRQ_CHIP select PINCTRL select SUN4I_TIMER + select RESET_CONTROLLER if ARCH_SUNXI @@ -20,10 +22,8 @@ config MACH_SUN5I config MACH_SUN6I bool "Allwinner A31 (sun6i) SoCs support" default ARCH_SUNXI - select ARCH_HAS_RESET_CONTROLLER select ARM_GIC select MFD_SUN6I_PRCM - select RESET_CONTROLLER select SUN5I_HSTIMER config MACH_SUN7I @@ -37,16 +37,12 @@ config MACH_SUN7I config MACH_SUN8I bool "Allwinner A23 (sun8i) SoCs support" default ARCH_SUNXI - select ARCH_HAS_RESET_CONTROLLER select ARM_GIC select MFD_SUN6I_PRCM - select RESET_CONTROLLER config MACH_SUN9I bool "Allwinner (sun9i) SoCs support" default ARCH_SUNXI - select ARCH_HAS_RESET_CONTROLLER select ARM_GIC - select RESET_CONTROLLER endif -- cgit v1.2.1 From 5c0c75d33d45a8dc7a2af815834812d41f5361e8 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Sat, 31 Jan 2015 14:49:38 +0900 Subject: ARM: pxa: Fix typo in zeus.c This patch fix a typo in struct platform_device can_regulator_device. Signed-off-by: Masanari Iida Acked-by: Daniel Mack Signed-off-by: Robert Jarzmik --- arch/arm/mach-pxa/zeus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c index 205f9bf3821e..ac2ae5c71ab4 100644 --- a/arch/arm/mach-pxa/zeus.c +++ b/arch/arm/mach-pxa/zeus.c @@ -412,7 +412,7 @@ static struct fixed_voltage_config can_regulator_pdata = { }; static struct platform_device can_regulator_device = { - .name = "reg-fixed-volage", + .name = "reg-fixed-voltage", .id = 0, .dev = { .platform_data = &can_regulator_pdata, -- cgit v1.2.1 From d6cf30ca716b347587b35923eda400ad2d9e8832 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Sat, 14 Feb 2015 22:41:56 +0100 Subject: ARM: pxa: fix pxa interrupts handling in DT The commit "ARM: pxa: arbitrarily set first interrupt number" changed the first pxa interrupt to 16. As a consequence, device-tree builds got broken, because : - pxa_mask_irq() and pxa_unmask_irq() are using IRQ_BIT() - IRQ_BIT(x) calculates the interrupts as : x - PXA_IRQ(0) Before the commit, the first interrupt shift, PXA_IRQ(0) was 0, therefore IRQ_BIT(x) was x. After the change, it is necessary that the same shift of 16 is applied between the virtual interrupt number and the hardware irq number. This situation comes from the common irq_chip shared between legacy platform builds and device-tree builds. Fix the broken interrupts in DT case by adding this shift in the DT case too. As a consequence of the IRQ_BIT() is removed alltogether from interrupts handling, even in the platform data types of platforms : - a legacy irq domain is used - the irq_chip handles hardware interrupts - the virtual to hardware interrupt conversion is fully handled by irq domain mechanics Signed-off-by: Robert Jarzmik --- arch/arm/Kconfig | 1 + arch/arm/mach-pxa/irq.c | 111 +++++++++++++++++++++--------------------------- 2 files changed, 49 insertions(+), 63 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9f1f09a2bc9b..cf4c0c99aa25 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -619,6 +619,7 @@ config ARCH_PXA select GENERIC_CLOCKEVENTS select GPIO_PXA select HAVE_IDE + select IRQ_DOMAIN select MULTI_IRQ_HANDLER select PLAT_PXA select SPARSE_IRQ diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c index 0eecd83c624e..89a7c06570d3 100644 --- a/arch/arm/mach-pxa/irq.c +++ b/arch/arm/mach-pxa/irq.c @@ -11,6 +11,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include #include #include #include @@ -40,7 +41,6 @@ #define ICHP_VAL_IRQ (1 << 31) #define ICHP_IRQ(i) (((i) >> 16) & 0x7fff) #define IPR_VALID (1 << 31) -#define IRQ_BIT(n) (((n) - PXA_IRQ(0)) & 0x1f) #define MAX_INTERNAL_IRQS 128 @@ -51,6 +51,7 @@ static void __iomem *pxa_irq_base; static int pxa_internal_irq_nr; static bool cpu_has_ipr; +static struct irq_domain *pxa_irq_domain; static inline void __iomem *irq_base(int i) { @@ -66,18 +67,20 @@ static inline void __iomem *irq_base(int i) void pxa_mask_irq(struct irq_data *d) { void __iomem *base = irq_data_get_irq_chip_data(d); + irq_hw_number_t irq = irqd_to_hwirq(d); uint32_t icmr = __raw_readl(base + ICMR); - icmr &= ~(1 << IRQ_BIT(d->irq)); + icmr &= ~BIT(irq & 0x1f); __raw_writel(icmr, base + ICMR); } void pxa_unmask_irq(struct irq_data *d) { void __iomem *base = irq_data_get_irq_chip_data(d); + irq_hw_number_t irq = irqd_to_hwirq(d); uint32_t icmr = __raw_readl(base + ICMR); - icmr |= 1 << IRQ_BIT(d->irq); + icmr |= BIT(irq & 0x1f); __raw_writel(icmr, base + ICMR); } @@ -118,40 +121,63 @@ asmlinkage void __exception_irq_entry ichp_handle_irq(struct pt_regs *regs) } while (1); } -void __init pxa_init_irq(int irq_nr, int (*fn)(struct irq_data *, unsigned int)) +static int pxa_irq_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) { - int irq, i, n; + void __iomem *base = irq_base(hw / 32); - BUG_ON(irq_nr > MAX_INTERNAL_IRQS); + /* initialize interrupt priority */ + if (cpu_has_ipr) + __raw_writel(hw | IPR_VALID, pxa_irq_base + IPR(hw)); + + irq_set_chip_and_handler(virq, &pxa_internal_irq_chip, + handle_level_irq); + irq_set_chip_data(virq, base); + set_irq_flags(virq, IRQF_VALID); + + return 0; +} + +static struct irq_domain_ops pxa_irq_ops = { + .map = pxa_irq_map, + .xlate = irq_domain_xlate_onecell, +}; + +static __init void +pxa_init_irq_common(struct device_node *node, int irq_nr, + int (*fn)(struct irq_data *, unsigned int)) +{ + int n; pxa_internal_irq_nr = irq_nr; - cpu_has_ipr = !cpu_is_pxa25x(); - pxa_irq_base = io_p2v(0x40d00000); + pxa_irq_domain = irq_domain_add_legacy(node, irq_nr, + PXA_IRQ(0), 0, + &pxa_irq_ops, NULL); + if (!pxa_irq_domain) + panic("Unable to add PXA IRQ domain\n"); + irq_set_default_host(pxa_irq_domain); for (n = 0; n < irq_nr; n += 32) { void __iomem *base = irq_base(n >> 5); __raw_writel(0, base + ICMR); /* disable all IRQs */ __raw_writel(0, base + ICLR); /* all IRQs are IRQ, not FIQ */ - for (i = n; (i < (n + 32)) && (i < irq_nr); i++) { - /* initialize interrupt priority */ - if (cpu_has_ipr) - __raw_writel(i | IPR_VALID, pxa_irq_base + IPR(i)); - - irq = PXA_IRQ(i); - irq_set_chip_and_handler(irq, &pxa_internal_irq_chip, - handle_level_irq); - irq_set_chip_data(irq, base); - set_irq_flags(irq, IRQF_VALID); - } } - /* only unmasked interrupts kick us out of idle */ __raw_writel(1, irq_base(0) + ICCR); pxa_internal_irq_chip.irq_set_wake = fn; } +void __init pxa_init_irq(int irq_nr, int (*fn)(struct irq_data *, unsigned int)) +{ + BUG_ON(irq_nr > MAX_INTERNAL_IRQS); + + pxa_irq_base = io_p2v(0x40d00000); + cpu_has_ipr = !cpu_is_pxa25x(); + pxa_init_irq_common(NULL, irq_nr, fn); +} + #ifdef CONFIG_PM static unsigned long saved_icmr[MAX_INTERNAL_IRQS/32]; static unsigned long saved_ipr[MAX_INTERNAL_IRQS]; @@ -203,30 +229,6 @@ struct syscore_ops pxa_irq_syscore_ops = { }; #ifdef CONFIG_OF -static struct irq_domain *pxa_irq_domain; - -static int pxa_irq_map(struct irq_domain *h, unsigned int virq, - irq_hw_number_t hw) -{ - void __iomem *base = irq_base(hw / 32); - - /* initialize interrupt priority */ - if (cpu_has_ipr) - __raw_writel(hw | IPR_VALID, pxa_irq_base + IPR(hw)); - - irq_set_chip_and_handler(hw, &pxa_internal_irq_chip, - handle_level_irq); - irq_set_chip_data(hw, base); - set_irq_flags(hw, IRQF_VALID); - - return 0; -} - -static struct irq_domain_ops pxa_irq_ops = { - .map = pxa_irq_map, - .xlate = irq_domain_xlate_onecell, -}; - static const struct of_device_id intc_ids[] __initconst = { { .compatible = "marvell,pxa-intc", }, {} @@ -236,7 +238,7 @@ void __init pxa_dt_irq_init(int (*fn)(struct irq_data *, unsigned int)) { struct device_node *node; struct resource res; - int n, ret; + int ret; node = of_find_matching_node(NULL, intc_ids); if (!node) { @@ -267,23 +269,6 @@ void __init pxa_dt_irq_init(int (*fn)(struct irq_data *, unsigned int)) return; } - pxa_irq_domain = irq_domain_add_legacy(node, pxa_internal_irq_nr, 0, 0, - &pxa_irq_ops, NULL); - if (!pxa_irq_domain) - panic("Unable to add PXA IRQ domain\n"); - - irq_set_default_host(pxa_irq_domain); - - for (n = 0; n < pxa_internal_irq_nr; n += 32) { - void __iomem *base = irq_base(n >> 5); - - __raw_writel(0, base + ICMR); /* disable all IRQs */ - __raw_writel(0, base + ICLR); /* all IRQs are IRQ, not FIQ */ - } - - /* only unmasked interrupts kick us out of idle */ - __raw_writel(1, irq_base(0) + ICCR); - - pxa_internal_irq_chip.irq_set_wake = fn; + pxa_init_irq_common(node, pxa_internal_irq_nr, fn); } #endif /* CONFIG_OF */ -- cgit v1.2.1 From 7d70e15480c0450d2bfafaad338a32e884fc215e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 4 Mar 2015 10:37:43 -0500 Subject: writeback: add missing INITIAL_JIFFIES init in global_update_bandwidth() global_update_bandwidth() uses static variable update_time as the timestamp for the last update but forgets to initialize it to INITIALIZE_JIFFIES. This means that global_dirty_limit will be 5 mins into the future on 32bit and some large amount jiffies into the past on 64bit. This isn't critical as the only effect is that global_dirty_limit won't be updated for the first 5 mins after booting on 32bit machines, especially given the auxiliary nature of global_dirty_limit's role - protecting against global dirty threshold's sudden dips; however, it does lead to unintended suboptimal behavior. Fix it. Fixes: c42843f2f0bb ("writeback: introduce smoothed global dirty limit") Signed-off-by: Tejun Heo Acked-by: Jan Kara Cc: Wu Fengguang Cc: Jens Axboe Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- mm/page-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 45e187b2d971..b4fd980a93eb 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -922,7 +922,7 @@ static void global_update_bandwidth(unsigned long thresh, unsigned long now) { static DEFINE_SPINLOCK(dirty_lock); - static unsigned long update_time; + static unsigned long update_time = INITIAL_JIFFIES; /* * check locklessly first to optimize away locking for the most time -- cgit v1.2.1 From ff6b8090e26ef7649ef0cc6b42389141ef48b0cf Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Tue, 27 Jan 2015 18:08:22 +0530 Subject: nbd: fix possible memory leak we have already allocated memory for nbd_dev, but we were not releasing that memory and just returning the error value. Signed-off-by: Sudip Mukherjee Acked-by: Paul Clements Cc: Signed-off-by: Markus Pargmann --- drivers/block/nbd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 4bc2a5cb9935..a98c41f72c63 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -803,10 +803,6 @@ static int __init nbd_init(void) return -EINVAL; } - nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL); - if (!nbd_dev) - return -ENOMEM; - part_shift = 0; if (max_part > 0) { part_shift = fls(max_part); @@ -828,6 +824,10 @@ static int __init nbd_init(void) if (nbds_max > 1UL << (MINORBITS - part_shift)) return -EINVAL; + nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL); + if (!nbd_dev) + return -ENOMEM; + for (i = 0; i < nbds_max; i++) { struct gendisk *disk = alloc_disk(1 << part_shift); if (!disk) -- cgit v1.2.1 From 12cb89e37a0c25fae7a0f1d2e4985558db9d0b13 Mon Sep 17 00:00:00 2001 From: "Ivan T. Ivanov" Date: Fri, 6 Mar 2015 17:26:17 +0200 Subject: spi: qup: Fix cs-num DT property parsing num-cs is 32 bit property, don't read just upper 16 bits. Fixes: 4a8573abe965 (spi: qup: Remove chip select function) Signed-off-by: Ivan T. Ivanov Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi-qup.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c index ff9cdbdb6672..2b2c359f5a50 100644 --- a/drivers/spi/spi-qup.c +++ b/drivers/spi/spi-qup.c @@ -498,7 +498,7 @@ static int spi_qup_probe(struct platform_device *pdev) struct resource *res; struct device *dev; void __iomem *base; - u32 max_freq, iomode; + u32 max_freq, iomode, num_cs; int ret, irq, size; dev = &pdev->dev; @@ -550,10 +550,11 @@ static int spi_qup_probe(struct platform_device *pdev) } /* use num-cs unless not present or out of range */ - if (of_property_read_u16(dev->of_node, "num-cs", - &master->num_chipselect) || - (master->num_chipselect > SPI_NUM_CHIPSELECTS)) + if (of_property_read_u32(dev->of_node, "num-cs", &num_cs) || + num_cs > SPI_NUM_CHIPSELECTS) master->num_chipselect = SPI_NUM_CHIPSELECTS; + else + master->num_chipselect = num_cs; master->bus_num = pdev->id; master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP; -- cgit v1.2.1 From 854d2f241d71f6ca08ccde30e6c7c2e403363e52 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 6 Mar 2015 14:42:01 +0200 Subject: spi: dw-mid: clear BUSY flag fist and test other one The logic of DMA completion is broken now since test_and_clear_bit() never returns the other bit is set. It means condition are always false and we have spi_finalize_current_transfer() called per each DMA completion which is wrong. The patch fixes logic by clearing BUSY bit first and then check for the other one. Fixes: 30c8eb52cc4a (spi: dw-mid: split rx and tx callbacks when DMA) Signed-off-by: Andy Shevchenko Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi-dw-mid.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c index 3ce39d10fafb..4f8c798e0633 100644 --- a/drivers/spi/spi-dw-mid.c +++ b/drivers/spi/spi-dw-mid.c @@ -108,7 +108,8 @@ static void dw_spi_dma_tx_done(void *arg) { struct dw_spi *dws = arg; - if (test_and_clear_bit(TX_BUSY, &dws->dma_chan_busy) & BIT(RX_BUSY)) + clear_bit(TX_BUSY, &dws->dma_chan_busy); + if (test_bit(RX_BUSY, &dws->dma_chan_busy)) return; dw_spi_xfer_done(dws); } @@ -156,7 +157,8 @@ static void dw_spi_dma_rx_done(void *arg) { struct dw_spi *dws = arg; - if (test_and_clear_bit(RX_BUSY, &dws->dma_chan_busy) & BIT(TX_BUSY)) + clear_bit(RX_BUSY, &dws->dma_chan_busy); + if (test_bit(TX_BUSY, &dws->dma_chan_busy)) return; dw_spi_xfer_done(dws); } -- cgit v1.2.1 From 3a43477fa36eee2997313380cb52f486e2bff316 Mon Sep 17 00:00:00 2001 From: Roger Tseng Date: Fri, 6 Mar 2015 11:36:06 +0800 Subject: mfd: rtsx_usb: Prevent DMA from stack Functions rtsx_usb_ep0_read_register() and rtsx_usb_get_card_status() both use arbitrary buffer addresses from arguments directly for DMA and the buffers could be located in stack. This was caught by DMA-API debug check. Fixes this by using double-buffers via kzalloc in both functions to guarantee the validity of DMA buffer. WARNING: CPU: 1 PID: 25 at lib/dma-debug.c:1166 check_for_stack+0x96/0xe0() ehci-pci 0000:00:1a.0: DMA-API: device driver maps memory from stack [addr=ffff8801199e3cef] Modules linked in: rtsx_usb_ms arc4 memstick intel_rapl iosf_mbi rtl8192ce snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic snd_hda_intel rtl_pci rtl8192c_common snd_hda_controller x86_pkg_temp_thermal snd_hda_codec rtlwifi mac80211 coretemp kvm_intel kvm iTCO_wdt snd_hwdep snd_seq snd_seq_device crct10dif_pclmul iTCO_vendor_support sparse_keymap cfg80211 crc32_pclmul snd_pcm crc32c_intel ghash_clmulni_intel rfkill i2c_i801 snd_timer shpchp snd serio_raw mei_me lpc_ich soundcore mei tpm_tis tpm wmi nfsd auth_rpcgss nfs_acl lockd grace sunrpc i915 rtsx_usb_sdmmc mmc_core 8021q uas garp stp i2c_algo_bit llc mrp drm_kms_helper usb_storage drm rtsx_usb mfd_core r8169 mii video CPU: 1 PID: 25 Comm: kworker/1:2 Not tainted 3.20.0-0.rc0.git7.3.fc22.x86_64 #1 Hardware name: WB WB-B06211/WB-B0621, BIOS EB062IWB V1.0 12/12/2013 Workqueue: events rtsx_usb_ms_handle_req [rtsx_usb_ms] 0000000000000000 000000003d188e66 ffff8801199e3808 ffffffff8187642b 0000000000000000 ffff8801199e3860 ffff8801199e3848 ffffffff810ab39a ffff8801199e3864 ffff8801199e3cef ffff880119b57098 ffff880119b37320 Call Trace: [] dump_stack+0x4c/0x65 [] warn_slowpath_common+0x8a/0xc0 [] warn_slowpath_fmt+0x55/0x70 [] ? _raw_spin_unlock_irqrestore+0x36/0x70 [] check_for_stack+0x96/0xe0 [] debug_dma_map_page+0x104/0x150 [] usb_hcd_map_urb_for_dma+0x646/0x790 [] usb_hcd_submit_urb+0x1d5/0xa90 [] ? mark_held_locks+0x7f/0xc0 [] ? mark_held_locks+0x7f/0xc0 [] ? lockdep_init_map+0x65/0x5d0 [] usb_submit_urb+0x42e/0x5f0 [] usb_start_wait_urb+0x77/0x190 [] ? __kmalloc+0x205/0x2d0 [] usb_control_msg+0xdc/0x130 [] rtsx_usb_ep0_read_register+0x59/0x70 [rtsx_usb] [] ? rtsx_usb_get_rsp+0x41/0x50 [rtsx_usb] [] rtsx_usb_ms_handle_req+0x7ce/0x9c5 [rtsx_usb_ms] Reported-by: Josh Boyer Signed-off-by: Roger Tseng Signed-off-by: Lee Jones --- drivers/mfd/rtsx_usb.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/mfd/rtsx_usb.c b/drivers/mfd/rtsx_usb.c index ede50244f265..dbd907d7170e 100644 --- a/drivers/mfd/rtsx_usb.c +++ b/drivers/mfd/rtsx_usb.c @@ -196,18 +196,27 @@ EXPORT_SYMBOL_GPL(rtsx_usb_ep0_write_register); int rtsx_usb_ep0_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data) { u16 value; + u8 *buf; + int ret; if (!data) return -EINVAL; - *data = 0; + + buf = kzalloc(sizeof(u8), GFP_KERNEL); + if (!buf) + return -ENOMEM; addr |= EP0_READ_REG_CMD << EP0_OP_SHIFT; value = swab16(addr); - return usb_control_msg(ucr->pusb_dev, + ret = usb_control_msg(ucr->pusb_dev, usb_rcvctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - value, 0, data, 1, 100); + value, 0, buf, 1, 100); + *data = *buf; + + kfree(buf); + return ret; } EXPORT_SYMBOL_GPL(rtsx_usb_ep0_read_register); @@ -288,18 +297,27 @@ static int rtsx_usb_get_status_with_bulk(struct rtsx_ucr *ucr, u16 *status) int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status) { int ret; + u16 *buf; if (!status) return -EINVAL; - if (polling_pipe == 0) + if (polling_pipe == 0) { + buf = kzalloc(sizeof(u16), GFP_KERNEL); + if (!buf) + return -ENOMEM; + ret = usb_control_msg(ucr->pusb_dev, usb_rcvctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_POLL, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0, 0, status, 2, 100); - else + 0, 0, buf, 2, 100); + *status = *buf; + + kfree(buf); + } else { ret = rtsx_usb_get_status_with_bulk(ucr, status); + } /* usb_control_msg may return positive when success */ if (ret < 0) -- cgit v1.2.1 From c8648508ebfc597058d2cd00b6c539110264a167 Mon Sep 17 00:00:00 2001 From: Ameya Palande <2ameya@gmail.com> Date: Thu, 26 Feb 2015 12:05:51 -0800 Subject: mfd: kempld-core: Fix callback return value check On success, callback function returns 0. So invert the if condition check so that we can break out of loop. Cc: stable@vger.kernel.org Signed-off-by: Ameya Palande <2ameya@gmail.com> Signed-off-by: Lee Jones --- drivers/mfd/kempld-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/kempld-core.c b/drivers/mfd/kempld-core.c index f38ec424872e..5615522f8d62 100644 --- a/drivers/mfd/kempld-core.c +++ b/drivers/mfd/kempld-core.c @@ -739,7 +739,7 @@ static int __init kempld_init(void) for (id = kempld_dmi_table; id->matches[0].slot != DMI_NONE; id++) if (strstr(id->ident, force_device_id)) - if (id->callback && id->callback(id)) + if (id->callback && !id->callback(id)) break; if (id->matches[0].slot == DMI_NONE) return -ENODEV; -- cgit v1.2.1 From 78146572b9cd20452da47951812f35b1ad4906be Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Thu, 12 Mar 2015 09:37:58 +0000 Subject: netfilter: Zero the tuple in nfnl_cthelper_parse_tuple() nfnl_cthelper_parse_tuple() is called from nfnl_cthelper_new(), nfnl_cthelper_get() and nfnl_cthelper_del(). In each case they pass a pointer to an nf_conntrack_tuple data structure local variable: struct nf_conntrack_tuple tuple; ... ret = nfnl_cthelper_parse_tuple(&tuple, tb[NFCTH_TUPLE]); The problem is that this local variable is not initialized, and nfnl_cthelper_parse_tuple() only initializes two fields: src.l3num and dst.protonum. This leaves all other fields with undefined values based on whatever is on the stack: tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM])); tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]); The symptom observed was that when the rpc and tns helpers were added then traffic to port 1536 was being sent to user-space. Signed-off-by: Ian Wilson Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_cthelper.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index a5599fc51a6f..54330fb5efaf 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -77,6 +77,9 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) return -EINVAL; + /* Not all fields are initialized so first zero the tuple */ + memset(tuple, 0, sizeof(struct nf_conntrack_tuple)); + tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM])); tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]); -- cgit v1.2.1 From d8bdff59cea141d2e5f7e98c1b11d3e0271640bd Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 13 Mar 2015 10:52:14 +1100 Subject: netfilter: Fix potential crash in nft_hash walker When we get back an EAGAIN from rhashtable_walk_next we were treating it as a valid object which obviously doesn't work too well. Luckily this is hard to trigger so it seems nobody has run into it yet. This patch fixes it by redoing the next call when we get an EAGAIN. Signed-off-by: Herbert Xu Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_hash.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index c82df0a48fcd..37c15e674884 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -153,6 +153,8 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, iter->err = err; goto out; } + + continue; } if (iter->count < iter->skip) -- cgit v1.2.1 From 0a64815091bd0ad6c6cdfaac2fae55b0f3ecf974 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 9 Mar 2015 17:34:18 +0100 Subject: s390/cpum_sf: add diagnostic sampling event only if it is authorized The SF_CYCLES_BASIC_DIAG is always registered even if it is turned of in the current hardware configuration. Because diagnostic-sampling is typically not turned on in the hardware configuration, do not register this perf event by default. Enable it only if the diagnostic-sampling function is authorized. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_sf.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index c3f8d157cb0d..e6a1578fc000 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1415,7 +1415,7 @@ CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG); static struct attribute *cpumsf_pmu_events_attr[] = { CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC), - CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG), + NULL, NULL, }; @@ -1606,8 +1606,11 @@ static int __init init_cpum_sampling_pmu(void) return -EINVAL; } - if (si.ad) + if (si.ad) { sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); + cpumsf_pmu_events_attr[1] = + CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG); + } sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); if (!sfdbg) -- cgit v1.2.1 From 20e76ee184a7cea155268377c4e414eea1dab6fd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 13 Mar 2015 10:31:42 +0100 Subject: s390/ftrace: fix compile error if CONFIG_KPROBES is disabled Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ftrace.c | 61 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 82c19899574f..6c79f1b44fe7 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -57,6 +57,44 @@ unsigned long ftrace_plt; +static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn) +{ +#ifdef CC_USING_HOTPATCH + /* brcl 0,0 */ + insn->opc = 0xc004; + insn->disp = 0; +#else + /* stg r14,8(r15) */ + insn->opc = 0xe3e0; + insn->disp = 0xf0080024; +#endif +} + +static inline int is_kprobe_on_ftrace(struct ftrace_insn *insn) +{ +#ifdef CONFIG_KPROBES + if (insn->opc == BREAKPOINT_INSTRUCTION) + return 1; +#endif + return 0; +} + +static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn) +{ +#ifdef CONFIG_KPROBES + insn->opc = BREAKPOINT_INSTRUCTION; + insn->disp = KPROBE_ON_FTRACE_NOP; +#endif +} + +static inline void ftrace_generate_kprobe_call_insn(struct ftrace_insn *insn) +{ +#ifdef CONFIG_KPROBES + insn->opc = BREAKPOINT_INSTRUCTION; + insn->disp = KPROBE_ON_FTRACE_CALL; +#endif +} + int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { @@ -72,16 +110,9 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, return -EFAULT; if (addr == MCOUNT_ADDR) { /* Initial code replacement */ -#ifdef CC_USING_HOTPATCH - /* We expect to see brcl 0,0 */ - ftrace_generate_nop_insn(&orig); -#else - /* We expect to see stg r14,8(r15) */ - orig.opc = 0xe3e0; - orig.disp = 0xf0080024; -#endif + ftrace_generate_orig_insn(&orig); ftrace_generate_nop_insn(&new); - } else if (old.opc == BREAKPOINT_INSTRUCTION) { + } else if (is_kprobe_on_ftrace(&old)) { /* * If we find a breakpoint instruction, a kprobe has been * placed at the beginning of the function. We write the @@ -89,9 +120,8 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, * bytes of the original instruction so that the kprobes * handler can execute a nop, if it reaches this breakpoint. */ - new.opc = orig.opc = BREAKPOINT_INSTRUCTION; - orig.disp = KPROBE_ON_FTRACE_CALL; - new.disp = KPROBE_ON_FTRACE_NOP; + ftrace_generate_kprobe_call_insn(&orig); + ftrace_generate_kprobe_nop_insn(&new); } else { /* Replace ftrace call with a nop. */ ftrace_generate_call_insn(&orig, rec->ip); @@ -111,7 +141,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; - if (old.opc == BREAKPOINT_INSTRUCTION) { + if (is_kprobe_on_ftrace(&old)) { /* * If we find a breakpoint instruction, a kprobe has been * placed at the beginning of the function. We write the @@ -119,9 +149,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * bytes of the original instruction so that the kprobes * handler can execute a brasl if it reaches this breakpoint. */ - new.opc = orig.opc = BREAKPOINT_INSTRUCTION; - orig.disp = KPROBE_ON_FTRACE_NOP; - new.disp = KPROBE_ON_FTRACE_CALL; + ftrace_generate_kprobe_nop_insn(&orig); + ftrace_generate_kprobe_call_insn(&new); } else { /* Replace nop with an ftrace call. */ ftrace_generate_nop_insn(&orig); -- cgit v1.2.1 From e143fa93c28669a7f0851e6850b1da5b1945fd53 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 13 Mar 2015 11:19:09 +0100 Subject: s390/mm: limit STACK_RND_MASK for compat tasks For compat tasks the mmap randomization does not use the maximum randomization value from mmap_rnd_mask but the fixed value of 0x7ff. This needs to be respected in the definition of STACK_RND_MASK as well. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/elf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index c9df40b5c0ac..c9c875d9ed31 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -211,7 +211,7 @@ do { \ extern unsigned long mmap_rnd_mask; -#define STACK_RND_MASK (mmap_rnd_mask) +#define STACK_RND_MASK (test_thread_flag(TIF_31BIT) ? 0x7ff : mmap_rnd_mask) #define ARCH_DLINFO \ do { \ -- cgit v1.2.1 From 9a30b096b543932de218dd3501b5562e00a8792d Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 12 Mar 2015 23:53:26 -0400 Subject: blk-mq: fix use of incorrect goto label in blk_mq_init_queue error path If percpu_ref_init() fails the allocated q and hctxs must get cleaned up; using 'err_map' doesn't allow that to happen. Signed-off-by: Mike Snitzer Reviewed-by: Ming Lei Cc: stable@kernel.org Signed-off-by: Jens Axboe --- block/blk-mq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 4f4bea21052e..b7b8933ec241 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1938,7 +1938,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) */ if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release, PERCPU_REF_INIT_ATOMIC, GFP_KERNEL)) - goto err_map; + goto err_mq_usage; setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); blk_queue_rq_timeout(q, 30000); @@ -1981,7 +1981,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) blk_mq_init_cpu_queues(q, set->nr_hw_queues); if (blk_mq_init_hw_queues(q, set)) - goto err_hw; + goto err_mq_usage; mutex_lock(&all_q_mutex); list_add_tail(&q->all_q_node, &all_q_list); @@ -1993,7 +1993,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) return q; -err_hw: +err_mq_usage: blk_cleanup_queue(q); err_hctxs: kfree(map); -- cgit v1.2.1 From b4331b433a2860389c9e35a523f0127788248500 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Sat, 14 Mar 2015 19:32:06 +0100 Subject: MAINTAINERS: add rockchip regexp to the ARM/Rockchip entry The regexp option is a nice way to catch even weirder paths like the current drivers/gpu/drm/rockchip/* or others in the future. Signed-off-by: Heiko Stuebner --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index ddc5a8cf9a8a..0d273bd264ac 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1351,6 +1351,7 @@ F: drivers/i2c/busses/i2c-rk3x.c F: drivers/*/*rockchip* F: drivers/*/*/*rockchip* F: sound/soc/rockchip/ +N: rockchip ARM/SAMSUNG EXYNOS ARM ARCHITECTURES M: Kukjin Kim -- cgit v1.2.1 From 54b0bc602541fcc2dd9f2480623c00552e0b220e Mon Sep 17 00:00:00 2001 From: Alexandru M Stan Date: Fri, 13 Mar 2015 17:55:32 -0700 Subject: ARM: dts: rockchip: disable gmac by default in rk3288.dtsi This block should not be enabled by default or else if the kconfig is set, it will try to load/probe even if there's no phy connected. Signed-off-by: Alexandru M Stan Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3288.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index d771f687a13b..eccc78d3220b 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -411,6 +411,7 @@ "mac_clk_rx", "mac_clk_tx", "clk_mac_ref", "clk_mac_refout", "aclk_mac", "pclk_mac"; + status = "disabled"; }; usb_host0_ehci: usb@ff500000 { -- cgit v1.2.1 From d6e5b7cc9819f9a108294f256dd80939e91a0a1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Thu, 26 Feb 2015 14:49:56 +0100 Subject: ARM: dts: omap3: Add missing dmas for crypto MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds missing dma DTS definitions for omap aes and sham drivers. Without it kernel drivers do not work for device tree based booting while it works for legacy booting on general purpose SoCs. Note that further changes are still needed for high secure SoCs. But since that never worked in legacy boot mode either, those will be sent separately. Signed-off-by: Pali Rohár Acked-by: Pavel Machek [tony@atomide.com: updated comments] Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi index f4f78c40b564..3fdc84fddb70 100644 --- a/arch/arm/boot/dts/omap3.dtsi +++ b/arch/arm/boot/dts/omap3.dtsi @@ -92,6 +92,8 @@ ti,hwmods = "aes"; reg = <0x480c5000 0x50>; interrupts = <0>; + dmas = <&sdma 65 &sdma 66>; + dma-names = "tx", "rx"; }; prm: prm@48306000 { @@ -550,6 +552,8 @@ ti,hwmods = "sham"; reg = <0x480c3000 0x64>; interrupts = <49>; + dmas = <&sdma 69>; + dma-names = "rx"; }; smartreflex_core: smartreflex@480cb000 { -- cgit v1.2.1 From e5ed5b60272871786b1c5434079925bc60d771b7 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 11 Mar 2015 18:38:38 -0500 Subject: ARM: OMAP2+: Fix socbus family info for AM33xx devices The family information in the soc-bus data is currently not classified properly for AM33xx devices, and a read of /sys/bus/soc/devices/soc0/family currently shows "Unknown". Fix the same. Signed-off-by: Suman Anna Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/id.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c index 2a2f4d56e4c8..25f1beea453e 100644 --- a/arch/arm/mach-omap2/id.c +++ b/arch/arm/mach-omap2/id.c @@ -720,6 +720,8 @@ static const char * __init omap_get_family(void) return kasprintf(GFP_KERNEL, "OMAP4"); else if (soc_is_omap54xx()) return kasprintf(GFP_KERNEL, "OMAP5"); + else if (soc_is_am33xx() || soc_is_am335x()) + return kasprintf(GFP_KERNEL, "AM33xx"); else if (soc_is_am43xx()) return kasprintf(GFP_KERNEL, "AM43xx"); else if (soc_is_dra7xx()) -- cgit v1.2.1 From e03826d5045e81a66a4fad7be9a8ecdaeb7911cf Mon Sep 17 00:00:00 2001 From: Keerthy Date: Tue, 17 Mar 2015 15:56:04 +0530 Subject: regulator: palmas: Correct TPS659038 register definition for REGEN2 The register offset for REGEN2_CTRL in different for TPS659038 chip as when compared with other Palmas family PMICs. In the case of TPS659038 the wrong offset pointed to PLLEN_CTRL and was causing a hang. Correcting the same. Signed-off-by: Keerthy Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/regulator/palmas-regulator.c | 4 ++++ include/linux/mfd/palmas.h | 3 +++ 2 files changed, 7 insertions(+) diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c index 9205f433573c..18198316b6cf 100644 --- a/drivers/regulator/palmas-regulator.c +++ b/drivers/regulator/palmas-regulator.c @@ -1572,6 +1572,10 @@ static int palmas_regulators_probe(struct platform_device *pdev) if (!pmic) return -ENOMEM; + if (of_device_is_compatible(node, "ti,tps659038-pmic")) + palmas_generic_regs_info[PALMAS_REG_REGEN2].ctrl_addr = + TPS659038_REGEN2_CTRL; + pmic->dev = &pdev->dev; pmic->palmas = palmas; palmas->pmic = pmic; diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h index fb0390a1a498..ee7b1ce7a6f8 100644 --- a/include/linux/mfd/palmas.h +++ b/include/linux/mfd/palmas.h @@ -2999,6 +2999,9 @@ enum usb_irq_events { #define PALMAS_GPADC_TRIM15 0x0E #define PALMAS_GPADC_TRIM16 0x0F +/* TPS659038 regen2_ctrl offset iss different from palmas */ +#define TPS659038_REGEN2_CTRL 0x12 + /* TPS65917 Interrupt registers */ /* Registers for function INTERRUPT */ -- cgit v1.2.1 From d6b6cb1d3e6f78d55c2d4043d77d0d8def3f3b99 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 17 Mar 2015 13:21:42 +0100 Subject: netfilter: nf_tables: allow to change chain policy without hook if it exists If there's an existing base chain, we have to allow to change the default policy without indicating the hook information. However, if the chain doesn't exists, we have to enforce the presence of the hook attribute. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6ab777912237..ac1a9528dbf2 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1225,7 +1225,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (nla[NFTA_CHAIN_POLICY]) { if ((chain != NULL && - !(chain->flags & NFT_BASE_CHAIN)) || + !(chain->flags & NFT_BASE_CHAIN))) + return -EOPNOTSUPP; + + if (chain == NULL && nla[NFTA_CHAIN_HOOK] == NULL) return -EOPNOTSUPP; -- cgit v1.2.1 From a76fc9dda87b51010e4bc60b5e0065a70180b465 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Mon, 16 Mar 2015 20:14:02 -0500 Subject: ARM: OMAP: dmtimer: check for pm_runtime_get_sync() failure The current OMAP dmtimer probe does not check for the return status of pm_runtime_get_sync() before initializing the timer registers. Any timer with missing hwmod data would return a failure here, and the access of registers without enabling the clocks for the timer would trigger a l3_noc interrupt and a kernel boot hang. Add proper checking so that the probe would return a failure graciously without hanging the kernel boot. Signed-off-by: Suman Anna Signed-off-by: Tony Lindgren --- arch/arm/plat-omap/dmtimer.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c index db10169a08de..f32c74c0e1de 100644 --- a/arch/arm/plat-omap/dmtimer.c +++ b/arch/arm/plat-omap/dmtimer.c @@ -799,6 +799,7 @@ static int omap_dm_timer_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; const struct of_device_id *match; const struct dmtimer_platform_data *pdata; + int ret; match = of_match_device(of_match_ptr(omap_timer_match), dev); pdata = match ? match->data : dev->platform_data; @@ -860,7 +861,12 @@ static int omap_dm_timer_probe(struct platform_device *pdev) } if (!timer->reserved) { - pm_runtime_get_sync(dev); + ret = pm_runtime_get_sync(dev); + if (ret < 0) { + dev_err(dev, "%s: pm_runtime_get_sync failed!\n", + __func__); + goto err_get_sync; + } __omap_dm_timer_init_regs(timer); pm_runtime_put(dev); } @@ -873,6 +879,11 @@ static int omap_dm_timer_probe(struct platform_device *pdev) dev_dbg(dev, "Device Probed.\n"); return 0; + +err_get_sync: + pm_runtime_put_noidle(dev); + pm_runtime_disable(dev); + return ret; } /** -- cgit v1.2.1 From 51b7e5728ebcded3f2ced9cd3ff71076c91e85de Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Mon, 16 Mar 2015 20:14:03 -0500 Subject: ARM: OMAP: dmtimer: disable pm runtime on remove Disable the pm_runtime of the device upon remove. This is added to balance the pm_runtime_enable() invoked in the probe. Signed-off-by: Suman Anna Signed-off-by: Tony Lindgren --- arch/arm/plat-omap/dmtimer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c index f32c74c0e1de..8ca94d379bc3 100644 --- a/arch/arm/plat-omap/dmtimer.c +++ b/arch/arm/plat-omap/dmtimer.c @@ -910,6 +910,8 @@ static int omap_dm_timer_remove(struct platform_device *pdev) } spin_unlock_irqrestore(&dm_timer_lock, flags); + pm_runtime_disable(&pdev->dev); + return ret; } -- cgit v1.2.1 From 0790ec172de1bd2e23f1dbd4925426b6cc3c1b72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Tue, 17 Mar 2015 14:02:32 +0100 Subject: KVM: nVMX: mask unrestricted_guest if disabled on L0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If EPT was enabled, unrestricted_guest was allowed in L1 regardless of L0. L1 triple faulted when running L2 guest that required emulation. Another side effect was 'WARN_ON_ONCE(vmx->nested.nested_run_pending)' in L0's dmesg: WARNING: CPU: 0 PID: 0 at arch/x86/kvm/vmx.c:9190 nested_vmx_vmexit+0x96e/0xb00 [kvm_intel] () Prevent this scenario by masking SECONDARY_EXEC_UNRESTRICTED_GUEST when the host doesn't have it enabled. Fixes: 78051e3b7e35 ("KVM: nVMX: Disable unrestricted mode if ept=0") Cc: stable@vger.kernel.org Tested-By: Kashyap Chamarthy Signed-off-by: Radim Krčmář Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 10a481b7674d..ae4f6d35d19c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2479,8 +2479,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) if (enable_ept) { /* nested EPT: emulate EPT also to L1 */ vmx->nested.nested_vmx_secondary_ctls_high |= - SECONDARY_EXEC_ENABLE_EPT | - SECONDARY_EXEC_UNRESTRICTED_GUEST; + SECONDARY_EXEC_ENABLE_EPT; vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | VMX_EPT_INVEPT_BIT; @@ -2494,6 +2493,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) } else vmx->nested.nested_vmx_ept_caps = 0; + if (enable_unrestricted_guest) + vmx->nested.nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_UNRESTRICTED_GUEST; + /* miscellaneous data */ rdmsr(MSR_IA32_VMX_MISC, vmx->nested.nested_vmx_misc_low, -- cgit v1.2.1 From 391949b6f02121371e3d7d9082c6d17fd9853034 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 18 Mar 2015 11:27:28 +0100 Subject: spi: trigger trace event for message-done before mesg->complete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With spidev the mesg->complete callback points to spidev_complete. Calling this unblocks spidev_sync and so spidev_sync_write finishes. As the struct spi_message just read is a local variable in spidev_sync_write and recording the trace event accesses this message the recording is better done first. The same can happen for spidev_sync_read. This fixes an oops observed on a 3.14-rt system with spidev activity after echo 1 > /sys/kernel/debug/tracing/events/spi/enable . Signed-off-by: Uwe Kleine-König Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index c64a3e59fce3..57a195041dc7 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1105,13 +1105,14 @@ void spi_finalize_current_message(struct spi_master *master) "failed to unprepare message: %d\n", ret); } } + + trace_spi_message_done(mesg); + master->cur_msg_prepared = false; mesg->state = NULL; if (mesg->complete) mesg->complete(mesg->context); - - trace_spi_message_done(mesg); } EXPORT_SYMBOL_GPL(spi_finalize_current_message); -- cgit v1.2.1 From cf39284d41f67964cf42b21bb386c012cf5b7f65 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 18 Mar 2015 08:57:41 +0800 Subject: regulator: Fix documentation for regmap in the config dev_get_regulator() does not exist, fix the typo. Signed-off-by: Axel Lin Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index d4ad5b5a02bb..045f709cb89b 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -316,7 +316,7 @@ struct regulator_desc { * @driver_data: private regulator data * @of_node: OpenFirmware node to parse for device tree bindings (may be * NULL). - * @regmap: regmap to use for core regmap helpers if dev_get_regulator() is + * @regmap: regmap to use for core regmap helpers if dev_get_regmap() is * insufficient. * @ena_gpio_initialized: GPIO controlling regulator enable was properly * initialized, meaning that >= 0 is a valid gpio -- cgit v1.2.1 From 077155332265f1d64c57bd6c49748a8b7e72a3f9 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 20 Feb 2015 14:21:14 +0530 Subject: ARM: dts: dra7: remove ti,hwmod property from pcie phy Now that we don't have hwmod entry for pcie PHY remove the ti,hwmod property from PCIE PHY's. Otherwise we will get: platform 4a094000.pciephy: Cannot lookup hwmod 'pcie1-phy' Signed-off-by: Kishon Vijay Abraham I [tony@atomide.com: updated comments] Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dra7.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 127608d79033..c4659a979c41 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -1111,7 +1111,6 @@ "wkupclk", "refclk", "div-clk", "phy-div"; #phy-cells = <0>; - ti,hwmods = "pcie1-phy"; }; pcie2_phy: pciephy@4a095000 { @@ -1130,7 +1129,6 @@ "wkupclk", "refclk", "div-clk", "phy-div"; #phy-cells = <0>; - ti,hwmods = "pcie2-phy"; status = "disabled"; }; }; -- cgit v1.2.1 From 599c376c49323127c9bdbb0fa61a3d4743819bc2 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 18 Mar 2015 13:41:34 -0700 Subject: ARM: dts: Fix gpio interrupts for dm816x Commit 7800064ba507 ("ARM: dts: Add basic dm816x device tree configuration") added basic devices for dm816x, but I was not able to test the GPIO interrupts earlier until I found some suitable pins to test with. We can mux the MMC card detect and write protect pins from SD_SDCD and SD_SDWP mode to use a normal GPIO interrupts that are also suitable for the MMC subsystem. This turned out several issues that need to be fixed: - I set the GPIO type wrong to be compatible with omap3 instead of omap4. The GPIO controller on dm816x has EOI interrupt register like omap4 and am335x. - I got the GPIO interrupt numbers wrong as each bank has two and we only use one. They need to be set up the same way as on am335x. - The gpio banks are missing interrupt controller related properties. With these changes the GPIO interrupts can be used with the MMC card detect pin, so let's wire that up. Let's also mux all the MMC lines for completeness while at it. For the first GPIO bank I tested using GPMC lines temporarily muxed to GPIOs on the dip switch 10. Cc: Brian Hutchinson Cc: Matthijs van Duin Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dm8168-evm.dts | 19 +++++++++++++++++++ arch/arm/boot/dts/dm816x.dtsi | 18 ++++++++++++++---- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/dm8168-evm.dts b/arch/arm/boot/dts/dm8168-evm.dts index d3a29c1b8417..afe678f6d2e9 100644 --- a/arch/arm/boot/dts/dm8168-evm.dts +++ b/arch/arm/boot/dts/dm8168-evm.dts @@ -36,6 +36,20 @@ >; }; + mmc_pins: pinmux_mmc_pins { + pinctrl-single,pins = < + DM816X_IOPAD(0x0a70, MUX_MODE0) /* SD_POW */ + DM816X_IOPAD(0x0a74, MUX_MODE0) /* SD_CLK */ + DM816X_IOPAD(0x0a78, MUX_MODE0) /* SD_CMD */ + DM816X_IOPAD(0x0a7C, MUX_MODE0) /* SD_DAT0 */ + DM816X_IOPAD(0x0a80, MUX_MODE0) /* SD_DAT1 */ + DM816X_IOPAD(0x0a84, MUX_MODE0) /* SD_DAT2 */ + DM816X_IOPAD(0x0a88, MUX_MODE0) /* SD_DAT2 */ + DM816X_IOPAD(0x0a8c, MUX_MODE2) /* GP1[7] */ + DM816X_IOPAD(0x0a90, MUX_MODE2) /* GP1[8] */ + >; + }; + usb0_pins: pinmux_usb0_pins { pinctrl-single,pins = < DM816X_IOPAD(0x0d00, MUX_MODE0) /* USB0_DRVVBUS */ @@ -137,7 +151,12 @@ }; &mmc1 { + pinctrl-names = "default"; + pinctrl-0 = <&mmc_pins>; vmmc-supply = <&vmmcsd_fixed>; + bus-width = <4>; + cd-gpios = <&gpio2 7 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 8 GPIO_ACTIVE_LOW>; }; /* At least dm8168-evm rev c won't support multipoint, later may */ diff --git a/arch/arm/boot/dts/dm816x.dtsi b/arch/arm/boot/dts/dm816x.dtsi index 3c97b5f2addc..f35715bc6992 100644 --- a/arch/arm/boot/dts/dm816x.dtsi +++ b/arch/arm/boot/dts/dm816x.dtsi @@ -150,17 +150,27 @@ }; gpio1: gpio@48032000 { - compatible = "ti,omap3-gpio"; + compatible = "ti,omap4-gpio"; ti,hwmods = "gpio1"; + ti,gpio-always-on; reg = <0x48032000 0x1000>; - interrupts = <97>; + interrupts = <96>; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; }; gpio2: gpio@4804c000 { - compatible = "ti,omap3-gpio"; + compatible = "ti,omap4-gpio"; ti,hwmods = "gpio2"; + ti,gpio-always-on; reg = <0x4804c000 0x1000>; - interrupts = <99>; + interrupts = <98>; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; }; gpmc: gpmc@50000000 { -- cgit v1.2.1 From bc188d818edf325ae38cfa43254a0b10a4defd65 Mon Sep 17 00:00:00 2001 From: Sam Bradshaw Date: Wed, 18 Mar 2015 17:06:18 -0600 Subject: blkmq: Fix NULL pointer deref when all reserved tags in When allocating from the reserved tags pool, bt_get() is called with a NULL hctx. If all tags are in use, the hw queue is kicked to push out any pending IO, potentially freeing tags, and tag allocation is retried. The problem is that blk_mq_run_hw_queue() doesn't check for a NULL hctx. So we avoid it with a simple NULL hctx test. Tested by hammering mtip32xx with concurrent smartctl/hdparm. Signed-off-by: Sam Bradshaw Signed-off-by: Selvan Mani Fixes: b32232073e80 ("blk-mq: fix hang in bt_get()") Cc: stable@kernel.org Added appropriate comment. Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index d53a764b05ea..be3290cc0644 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -278,9 +278,11 @@ static int bt_get(struct blk_mq_alloc_data *data, /* * We're out of tags on this hardware queue, kick any * pending IO submits before going to sleep waiting for - * some to complete. + * some to complete. Note that hctx can be NULL here for + * reserved tag allocation. */ - blk_mq_run_hw_queue(hctx, false); + if (hctx) + blk_mq_run_hw_queue(hctx, false); /* * Retry tag allocation after running the hardware queue, -- cgit v1.2.1 From 4017a7ee693d1cae6735c0dac21594a7c6416c4c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 2 Mar 2015 01:10:28 +0100 Subject: netfilter: restore rule tracing via nfnetlink_log MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since fab4085 ("netfilter: log: nf_log_packet() as real unified interface"), the loginfo structure that is passed to nf_log_packet() is used to explicitly indicate the logger type you want to use. This is a problem for people tracing rules through nfnetlink_log since packets are always routed to the NF_LOG_TYPE logger after the aforementioned patch. We can fix this by removing the trace loginfo structures, but that still changes the log level from 4 to 5 for tracing messages and there may be someone relying on this outthere. So let's just introduce a new nf_log_trace() function that restores the former behaviour. Reported-by: Markus Kötter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_log.h | 10 ++++++++++ net/ipv4/netfilter/ip_tables.c | 6 +++--- net/ipv6/netfilter/ip6_tables.c | 6 +++--- net/netfilter/nf_log.c | 24 ++++++++++++++++++++++++ net/netfilter/nf_tables_core.c | 8 ++++---- 5 files changed, 44 insertions(+), 10 deletions(-) diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 534e1f2ac4fc..57639fca223a 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -79,6 +79,16 @@ void nf_log_packet(struct net *net, const struct nf_loginfo *li, const char *fmt, ...); +__printf(8, 9) +void nf_log_trace(struct net *net, + u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *li, + const char *fmt, ...); + struct nf_log_buf; struct nf_log_buf *nf_log_buf_open(void); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 99e810f84671..cf5e82f39d3b 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -272,9 +272,9 @@ static void trace_packet(const struct sk_buff *skb, &chainname, &comment, &rulenum) != 0) break; - nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo, - "TRACE: %s:%s:%s:%u ", - tablename, chainname, comment, rulenum); + nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo, + "TRACE: %s:%s:%s:%u ", + tablename, chainname, comment, rulenum); } #endif diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index e080fbbbc0e5..bb00c6f2a885 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -298,9 +298,9 @@ static void trace_packet(const struct sk_buff *skb, &chainname, &comment, &rulenum) != 0) break; - nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo, - "TRACE: %s:%s:%s:%u ", - tablename, chainname, comment, rulenum); + nf_log_trace(net, AF_INET6, hook, skb, in, out, &trace_loginfo, + "TRACE: %s:%s:%s:%u ", + tablename, chainname, comment, rulenum); } #endif diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 0d8448f19dfe..675d12c69e32 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -212,6 +212,30 @@ void nf_log_packet(struct net *net, } EXPORT_SYMBOL(nf_log_packet); +void nf_log_trace(struct net *net, + u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, const char *fmt, ...) +{ + va_list args; + char prefix[NF_LOG_PREFIXLEN]; + const struct nf_logger *logger; + + rcu_read_lock(); + logger = rcu_dereference(net->nf.nf_loggers[pf]); + if (logger) { + va_start(args, fmt); + vsnprintf(prefix, sizeof(prefix), fmt, args); + va_end(args); + logger->logfn(net, pf, hooknum, skb, in, out, loginfo, prefix); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL(nf_log_trace); + #define S_SIZE (1024 - (sizeof(unsigned int) + 1)) struct nf_log_buf { diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 3b90eb2b2c55..2d298dccb6dd 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -94,10 +94,10 @@ static void nft_trace_packet(const struct nft_pktinfo *pkt, { struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); - nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, - pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", - chain->table->name, chain->name, comments[type], - rulenum); + nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, + pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", + chain->table->name, chain->name, comments[type], + rulenum); } unsigned int -- cgit v1.2.1 From 1ac31de744202a3a14601170a57f155b4a8d2c21 Mon Sep 17 00:00:00 2001 From: Mark James Date: Tue, 17 Mar 2015 21:35:23 +0000 Subject: ARM: socfpga: dts: fix spi1 interrupt The socfpga.dtsi currently has the wrong interrupt number set for SPI master 1 Trying to use the master without this change results in the kernel boot process waiting forever for an interrupt that will never occur while attempting to probe any slave devices configured in the device tree as being under SPI master 1. The change works for the Cyclone V, and according to the Arria 5 handbook should be good there too. Signed-off-by: Mark James Acked-by: Steffen Trumtrar Signed-off-by: Dinh Nguyen --- arch/arm/boot/dts/socfpga.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi index 9d8760956752..d9176e606173 100644 --- a/arch/arm/boot/dts/socfpga.dtsi +++ b/arch/arm/boot/dts/socfpga.dtsi @@ -660,7 +660,7 @@ #address-cells = <1>; #size-cells = <0>; reg = <0xfff01000 0x1000>; - interrupts = <0 156 4>; + interrupts = <0 155 4>; num-cs = <4>; clocks = <&spi_m_clk>; status = "disabled"; -- cgit v1.2.1 From 67d8712dcc70aa16d8e14a52eb73870e3cbddfc2 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 18 Mar 2015 11:57:39 -0600 Subject: selftests: Fix build failures when invoked from kselftest target Several tests that rely on implicit build rules fail to build, when invoked from the main Makefile kselftest target. These failures are due to --no-builtin-rules and --no-builtin-variables options set in the inherited MAKEFLAGS. --no-builtin-rules eliminates the use of built-in implicit rules and --no-builtin-variables is for not defining built-in variables. These two options override the use of implicit rules resulting in build failures. In addition, inherited LDFLAGS result in build failures and there is no need to define LDFLAGS. Clear LDFLAGS and MAKEFLAG when make is invoked from the main Makefile kselftest target. Fixing this at selftests Makefile avoids changing the main Makefile and keeps this change self contained at selftests level. Signed-off-by: Shuah Khan Acked-by: Michael Ellerman --- tools/testing/selftests/Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 4e511221a0c1..0db571340edb 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -22,6 +22,14 @@ TARGETS += vm TARGETS_HOTPLUG = cpu-hotplug TARGETS_HOTPLUG += memory-hotplug +# Clear LDFLAGS and MAKEFLAGS if called from main +# Makefile to avoid test build failures when test +# Makefile doesn't have explicit build rules. +ifeq (1,$(MAKELEVEL)) +undefine LDFLAGS +override MAKEFLAGS = +endif + all: for TARGET in $(TARGETS); do \ make -C $$TARGET; \ -- cgit v1.2.1 From 5067c0469c643512f24786990e315f9c15cc7d24 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 12 Mar 2015 10:32:18 -0700 Subject: ata: Add a new flag to destinguish sas controller SAS controller has its own tag allocation, which doesn't directly match to ATA tag, so SAS and SATA have different code path for ata tags. Originally we use port->scsi_host (98bd4be1) to destinguish SAS controller, but libsas set ->scsi_host too, so we can't use it for the destinguish, we add a new flag for this purpose. Without this patch, the following oops can happen because scsi-mq uses a host-wide tag map shared among all devices with some integer tag values >= ATA_MAX_QUEUE. These unexpectedly high tag values cause __ata_qc_from_tag() to return NULL, which is then dereferenced in ata_qc_new_init(). BUG: unable to handle kernel NULL pointer dereference at 0000000000000058 IP: [] ata_qc_new_init+0x3e/0x120 PGD 32adf0067 PUD 32adf1067 PMD 0 Oops: 0002 [#1] SMP DEBUG_PAGEALLOC Modules linked in: iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi igb i2c_algo_bit ptp pps_core pm80xx libsas scsi_transport_sas sg coretemp eeprom w83795 i2c_i801 CPU: 4 PID: 1450 Comm: cydiskbench Not tainted 4.0.0-rc3 #1 Hardware name: Supermicro X8DTH-i/6/iF/6F/X8DTH, BIOS 2.1b 05/04/12 task: ffff8800ba86d500 ti: ffff88032a064000 task.ti: ffff88032a064000 RIP: 0010:[] [] ata_qc_new_init+0x3e/0x120 RSP: 0018:ffff88032a067858 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff8800ba0d2230 RCX: 000000000000002a RDX: ffffffff80505ae0 RSI: 0000000000000020 RDI: ffff8800ba0d2230 RBP: ffff88032a067868 R08: 0000000000000201 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800ba0d0000 R13: ffff8800ba0d2230 R14: ffffffff80505ae0 R15: ffff8800ba0d0000 FS: 0000000041223950(0063) GS:ffff88033e480000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000058 CR3: 000000032a0a3000 CR4: 00000000000006e0 Stack: ffff880329eee758 ffff880329eee758 ffff88032a0678a8 ffffffff80502dad ffff8800ba167978 ffff880329eee758 ffff88032bf9c520 ffff8800ba167978 ffff88032bf9c520 ffff88032bf9a290 ffff88032a0678b8 ffffffff80506909 Call Trace: [] ata_scsi_translate+0x3d/0x1b0 [] ata_sas_queuecmd+0x149/0x2a0 [] sas_queuecommand+0xa0/0x1f0 [libsas] [] scsi_dispatch_cmd+0xd4/0x1a0 [] scsi_queue_rq+0x66f/0x7f0 [] __blk_mq_run_hw_queue+0x208/0x3f0 [] blk_mq_run_hw_queue+0x88/0xc0 [] blk_mq_insert_request+0xc4/0x130 [] blk_execute_rq_nowait+0x73/0x160 [] sg_common_write+0x3da/0x720 [sg] [] sg_new_write+0x250/0x360 [sg] [] sg_write+0x13b/0x450 [sg] [] vfs_write+0xd1/0x1b0 [] SyS_write+0x54/0xc0 [] system_call_fastpath+0x12/0x17 tj: updated description. Fixes: 12cb5ce101ab ("libata: use blk taging") Reported-and-tested-by: Tony Battersby Signed-off-by: Shaohua Li Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 4 ++-- drivers/scsi/ipr.c | 3 ++- drivers/scsi/libsas/sas_ata.c | 3 ++- include/linux/libata.h | 1 + 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 4c35f0822d06..ef150ebb4c30 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4737,7 +4737,7 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag) return NULL; /* libsas case */ - if (!ap->scsi_host) { + if (ap->flags & ATA_FLAG_SAS_HOST) { tag = ata_sas_allocate_tag(ap); if (tag < 0) return NULL; @@ -4776,7 +4776,7 @@ void ata_qc_free(struct ata_queued_cmd *qc) tag = qc->tag; if (likely(ata_tag_valid(tag))) { qc->tag = ATA_TAG_POISON; - if (!ap->scsi_host) + if (ap->flags & ATA_FLAG_SAS_HOST) ata_sas_free_tag(tag, ap); } } diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 9219953ee949..d9afc51af7d3 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -6815,7 +6815,8 @@ static struct ata_port_operations ipr_sata_ops = { }; static struct ata_port_info sata_port_info = { - .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA, + .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | + ATA_FLAG_SAS_HOST, .pio_mask = ATA_PIO4_ONLY, .mwdma_mask = ATA_MWDMA2, .udma_mask = ATA_UDMA6, diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 932d9cc98d2f..9c706d8c1441 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -547,7 +547,8 @@ static struct ata_port_operations sas_sata_ops = { }; static struct ata_port_info sata_port_info = { - .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ, + .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ | + ATA_FLAG_SAS_HOST, .pio_mask = ATA_PIO4, .mwdma_mask = ATA_MWDMA2, .udma_mask = ATA_UDMA6, diff --git a/include/linux/libata.h b/include/linux/libata.h index fc03efa64ffe..6b08cc106c21 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -232,6 +232,7 @@ enum { * led */ ATA_FLAG_NO_DIPM = (1 << 23), /* host not happy with DIPM */ ATA_FLAG_LOWTAG = (1 << 24), /* host wants lowest available tag */ + ATA_FLAG_SAS_HOST = (1 << 25), /* SAS host */ /* bits 24:31 of ap->flags are reserved for LLD specific flags */ -- cgit v1.2.1 From c6b570d97c0e77f570bb6b2ed30d372b2b1e9aae Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 9 Mar 2015 12:20:13 +0100 Subject: regmap: introduce regmap_name to fix syscon regmap trace events This patch fixes a NULL pointer dereference when enabling regmap event tracing in the presence of a syscon regmap, introduced by commit bdb0066df96e ("mfd: syscon: Decouple syscon interface from platform devices"). That patch introduced syscon regmaps that have their dev field set to NULL. The regmap trace events expect it to point to a valid struct device and feed it to dev_name(): $ echo 1 > /sys/kernel/debug/tracing/events/regmap/enable Unable to handle kernel NULL pointer dereference at virtual address 0000002c pgd = 80004000 [0000002c] *pgd=00000000 Internal error: Oops: 17 [#1] SMP ARM Modules linked in: coda videobuf2_vmalloc CPU: 0 PID: 304 Comm: kworker/0:2 Not tainted 4.0.0-rc2+ #9197 Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) Workqueue: events_freezable thermal_zone_device_check task: 9f25a200 ti: 9f1ee000 task.ti: 9f1ee000 PC is at ftrace_raw_event_regmap_block+0x3c/0xe4 LR is at _regmap_raw_read+0x1bc/0x1cc pc : [<803636e8>] lr : [<80365f2c>] psr: 600f0093 sp : 9f1efd78 ip : 9f1efdb8 fp : 9f1efdb4 r10: 00000004 r9 : 00000001 r8 : 00000001 r7 : 00000180 r6 : 00000000 r5 : 9f00e3c0 r4 : 00000003 r3 : 00000001 r2 : 00000180 r1 : 00000000 r0 : 9f00e3c0 Flags: nZCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment kernel Control: 10c5387d Table: 2d91004a DAC: 00000015 Process kworker/0:2 (pid: 304, stack limit = 0x9f1ee210) Stack: (0x9f1efd78 to 0x9f1f0000) fd60: 9f1efda4 9f1efd88 fd80: 800708c0 805f9510 80927140 800f0013 9f1fc800 9eb2f490 00000000 00000180 fda0: 808e3840 00000001 9f1efdfc 9f1efdb8 80365f2c 803636b8 805f8958 800708e0 fdc0: a00f0013 803636ac 9f16de00 00000180 80927140 9f1fc800 9f1fc800 9f1efe6c fde0: 9f1efe6c 9f732400 00000000 00000000 9f1efe1c 9f1efe00 80365f70 80365d7c fe00: 80365f3c 9f1fc800 9f1fc800 00000180 9f1efe44 9f1efe20 803656a4 80365f48 fe20: 9f1fc800 00000180 9f1efe6c 9f1efe6c 9f732400 00000000 9f1efe64 9f1efe48 fe40: 803657bc 80365634 00000001 9e95f910 9f1fc800 9f1efeb4 9f1efe8c 9f1efe68 fe60: 80452ac0 80365778 9f1efe8c 9f1efe78 9e93d400 9e93d5e8 9f1efeb4 9f72ef40 fe80: 9f1efeac 9f1efe90 8044e11c 80452998 8045298c 9e93d608 9e93d400 808e1978 fea0: 9f1efecc 9f1efeb0 8044fd14 8044e0d0 ffffffff 9f25a200 9e93d608 9e481380 fec0: 9f1efedc 9f1efed0 8044fde8 8044fcec 9f1eff1c 9f1efee0 80038d50 8044fdd8 fee0: 9f1ee020 9f72ef40 9e481398 00000000 00000008 9f72ef54 9f1ee020 9f72ef40 ff00: 9e481398 9e481380 00000008 9f72ef40 9f1eff5c 9f1eff20 80039754 80038bfc ff20: 00000000 9e481380 80894100 808e1662 00000000 9e4f2ec0 00000000 9e481380 ff40: 800396f8 00000000 00000000 00000000 9f1effac 9f1eff60 8003e020 80039704 ff60: ffffffff 00000000 ffffffff 9e481380 00000000 00000000 9f1eff78 9f1eff78 ff80: 00000000 00000000 9f1eff88 9f1eff88 9e4f2ec0 8003df30 00000000 00000000 ffa0: 00000000 9f1effb0 8000eb60 8003df3c 00000000 00000000 00000000 00000000 ffc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 ffe0: 00000000 00000000 00000000 00000000 00000013 00000000 ffffffff ffffffff Backtrace: [<803636ac>] (ftrace_raw_event_regmap_block) from [<80365f2c>] (_regmap_raw_read+0x1bc/0x1cc) r9:00000001 r8:808e3840 r7:00000180 r6:00000000 r5:9eb2f490 r4:9f1fc800 [<80365d70>] (_regmap_raw_read) from [<80365f70>] (_regmap_bus_read+0x34/0x6c) r10:00000000 r9:00000000 r8:9f732400 r7:9f1efe6c r6:9f1efe6c r5:9f1fc800 r4:9f1fc800 [<80365f3c>] (_regmap_bus_read) from [<803656a4>] (_regmap_read+0x7c/0x144) r6:00000180 r5:9f1fc800 r4:9f1fc800 r3:80365f3c [<80365628>] (_regmap_read) from [<803657bc>] (regmap_read+0x50/0x70) r9:00000000 r8:9f732400 r7:9f1efe6c r6:9f1efe6c r5:00000180 r4:9f1fc800 [<8036576c>] (regmap_read) from [<80452ac0>] (imx_get_temp+0x134/0x1a4) r6:9f1efeb4 r5:9f1fc800 r4:9e95f910 r3:00000001 [<8045298c>] (imx_get_temp) from [<8044e11c>] (thermal_zone_get_temp+0x58/0x74) r7:9f72ef40 r6:9f1efeb4 r5:9e93d5e8 r4:9e93d400 [<8044e0c4>] (thermal_zone_get_temp) from [<8044fd14>] (thermal_zone_device_update+0x34/0xec) r6:808e1978 r5:9e93d400 r4:9e93d608 r3:8045298c [<8044fce0>] (thermal_zone_device_update) from [<8044fde8>] (thermal_zone_device_check+0x1c/0x20) r5:9e481380 r4:9e93d608 [<8044fdcc>] (thermal_zone_device_check) from [<80038d50>] (process_one_work+0x160/0x3d4) [<80038bf0>] (process_one_work) from [<80039754>] (worker_thread+0x5c/0x4f4) r10:9f72ef40 r9:00000008 r8:9e481380 r7:9e481398 r6:9f72ef40 r5:9f1ee020 r4:9f72ef54 [<800396f8>] (worker_thread) from [<8003e020>] (kthread+0xf0/0x108) r10:00000000 r9:00000000 r8:00000000 r7:800396f8 r6:9e481380 r5:00000000 r4:9e4f2ec0 [<8003df30>] (kthread) from [<8000eb60>] (ret_from_fork+0x14/0x34) r7:00000000 r6:00000000 r5:8003df30 r4:9e4f2ec0 Code: e3140040 1a00001a e3140020 1a000016 (e596002c) ---[ end trace 193c15c2494ec960 ]--- Fixes: bdb0066df96e (mfd: syscon: Decouple syscon interface from platform devices) Signed-off-by: Philipp Zabel Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/base/regmap/internal.h | 8 +++ drivers/base/regmap/regcache.c | 16 +++--- drivers/base/regmap/regmap.c | 32 +++++------ include/trace/events/regmap.h | 123 ++++++++++++++++++++--------------------- 4 files changed, 91 insertions(+), 88 deletions(-) diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h index beb8b27d4621..a13587b5c2be 100644 --- a/drivers/base/regmap/internal.h +++ b/drivers/base/regmap/internal.h @@ -243,4 +243,12 @@ extern struct regcache_ops regcache_rbtree_ops; extern struct regcache_ops regcache_lzo_ops; extern struct regcache_ops regcache_flat_ops; +static inline const char *regmap_name(const struct regmap *map) +{ + if (map->dev) + return dev_name(map->dev); + + return map->name; +} + #endif diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c index f373c35f9e1d..f5db662e951e 100644 --- a/drivers/base/regmap/regcache.c +++ b/drivers/base/regmap/regcache.c @@ -218,7 +218,7 @@ int regcache_read(struct regmap *map, ret = map->cache_ops->read(map, reg, value); if (ret == 0) - trace_regmap_reg_read_cache(map->dev, reg, *value); + trace_regmap_reg_read_cache(map, reg, *value); return ret; } @@ -311,7 +311,7 @@ int regcache_sync(struct regmap *map) dev_dbg(map->dev, "Syncing %s cache\n", map->cache_ops->name); name = map->cache_ops->name; - trace_regcache_sync(map->dev, name, "start"); + trace_regcache_sync(map, name, "start"); if (!map->cache_dirty) goto out; @@ -346,7 +346,7 @@ out: regmap_async_complete(map); - trace_regcache_sync(map->dev, name, "stop"); + trace_regcache_sync(map, name, "stop"); return ret; } @@ -381,7 +381,7 @@ int regcache_sync_region(struct regmap *map, unsigned int min, name = map->cache_ops->name; dev_dbg(map->dev, "Syncing %s cache from %d-%d\n", name, min, max); - trace_regcache_sync(map->dev, name, "start region"); + trace_regcache_sync(map, name, "start region"); if (!map->cache_dirty) goto out; @@ -401,7 +401,7 @@ out: regmap_async_complete(map); - trace_regcache_sync(map->dev, name, "stop region"); + trace_regcache_sync(map, name, "stop region"); return ret; } @@ -428,7 +428,7 @@ int regcache_drop_region(struct regmap *map, unsigned int min, map->lock(map->lock_arg); - trace_regcache_drop_region(map->dev, min, max); + trace_regcache_drop_region(map, min, max); ret = map->cache_ops->drop(map, min, max); @@ -455,7 +455,7 @@ void regcache_cache_only(struct regmap *map, bool enable) map->lock(map->lock_arg); WARN_ON(map->cache_bypass && enable); map->cache_only = enable; - trace_regmap_cache_only(map->dev, enable); + trace_regmap_cache_only(map, enable); map->unlock(map->lock_arg); } EXPORT_SYMBOL_GPL(regcache_cache_only); @@ -493,7 +493,7 @@ void regcache_cache_bypass(struct regmap *map, bool enable) map->lock(map->lock_arg); WARN_ON(map->cache_only && enable); map->cache_bypass = enable; - trace_regmap_cache_bypass(map->dev, enable); + trace_regmap_cache_bypass(map, enable); map->unlock(map->lock_arg); } EXPORT_SYMBOL_GPL(regcache_cache_bypass); diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index f99b098ddabf..dbfe6a69c3da 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1281,7 +1281,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg, if (map->async && map->bus->async_write) { struct regmap_async *async; - trace_regmap_async_write_start(map->dev, reg, val_len); + trace_regmap_async_write_start(map, reg, val_len); spin_lock_irqsave(&map->async_lock, flags); async = list_first_entry_or_null(&map->async_free, @@ -1339,8 +1339,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg, return ret; } - trace_regmap_hw_write_start(map->dev, reg, - val_len / map->format.val_bytes); + trace_regmap_hw_write_start(map, reg, val_len / map->format.val_bytes); /* If we're doing a single register write we can probably just * send the work_buf directly, otherwise try to do a gather @@ -1372,8 +1371,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg, kfree(buf); } - trace_regmap_hw_write_done(map->dev, reg, - val_len / map->format.val_bytes); + trace_regmap_hw_write_done(map, reg, val_len / map->format.val_bytes); return ret; } @@ -1407,12 +1405,12 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg, map->format.format_write(map, reg, val); - trace_regmap_hw_write_start(map->dev, reg, 1); + trace_regmap_hw_write_start(map, reg, 1); ret = map->bus->write(map->bus_context, map->work_buf, map->format.buf_size); - trace_regmap_hw_write_done(map->dev, reg, 1); + trace_regmap_hw_write_done(map, reg, 1); return ret; } @@ -1470,7 +1468,7 @@ int _regmap_write(struct regmap *map, unsigned int reg, dev_info(map->dev, "%x <= %x\n", reg, val); #endif - trace_regmap_reg_write(map->dev, reg, val); + trace_regmap_reg_write(map, reg, val); return map->reg_write(context, reg, val); } @@ -1773,7 +1771,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map, for (i = 0; i < num_regs; i++) { int reg = regs[i].reg; int val = regs[i].def; - trace_regmap_hw_write_start(map->dev, reg, 1); + trace_regmap_hw_write_start(map, reg, 1); map->format.format_reg(u8, reg, map->reg_shift); u8 += reg_bytes + pad_bytes; map->format.format_val(u8, val, 0); @@ -1788,7 +1786,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map, for (i = 0; i < num_regs; i++) { int reg = regs[i].reg; - trace_regmap_hw_write_done(map->dev, reg, 1); + trace_regmap_hw_write_done(map, reg, 1); } return ret; } @@ -2059,15 +2057,13 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val, */ u8[0] |= map->read_flag_mask; - trace_regmap_hw_read_start(map->dev, reg, - val_len / map->format.val_bytes); + trace_regmap_hw_read_start(map, reg, val_len / map->format.val_bytes); ret = map->bus->read(map->bus_context, map->work_buf, map->format.reg_bytes + map->format.pad_bytes, val, val_len); - trace_regmap_hw_read_done(map->dev, reg, - val_len / map->format.val_bytes); + trace_regmap_hw_read_done(map, reg, val_len / map->format.val_bytes); return ret; } @@ -2123,7 +2119,7 @@ static int _regmap_read(struct regmap *map, unsigned int reg, dev_info(map->dev, "%x => %x\n", reg, *val); #endif - trace_regmap_reg_read(map->dev, reg, *val); + trace_regmap_reg_read(map, reg, *val); if (!map->cache_bypass) regcache_write(map, reg, *val); @@ -2480,7 +2476,7 @@ void regmap_async_complete_cb(struct regmap_async *async, int ret) struct regmap *map = async->map; bool wake; - trace_regmap_async_io_complete(map->dev); + trace_regmap_async_io_complete(map); spin_lock(&map->async_lock); list_move(&async->list, &map->async_free); @@ -2525,7 +2521,7 @@ int regmap_async_complete(struct regmap *map) if (!map->bus || !map->bus->async_write) return 0; - trace_regmap_async_complete_start(map->dev); + trace_regmap_async_complete_start(map); wait_event(map->async_waitq, regmap_async_is_done(map)); @@ -2534,7 +2530,7 @@ int regmap_async_complete(struct regmap *map) map->async_ret = 0; spin_unlock_irqrestore(&map->async_lock, flags); - trace_regmap_async_complete_done(map->dev); + trace_regmap_async_complete_done(map); return ret; } diff --git a/include/trace/events/regmap.h b/include/trace/events/regmap.h index 23d561512f64..22317d2b52ab 100644 --- a/include/trace/events/regmap.h +++ b/include/trace/events/regmap.h @@ -7,27 +7,26 @@ #include #include -struct device; -struct regmap; +#include "../../../drivers/base/regmap/internal.h" /* * Log register events */ DECLARE_EVENT_CLASS(regmap_reg, - TP_PROTO(struct device *dev, unsigned int reg, + TP_PROTO(struct regmap *map, unsigned int reg, unsigned int val), - TP_ARGS(dev, reg, val), + TP_ARGS(map, reg, val), TP_STRUCT__entry( - __string( name, dev_name(dev) ) - __field( unsigned int, reg ) - __field( unsigned int, val ) + __string( name, regmap_name(map) ) + __field( unsigned int, reg ) + __field( unsigned int, val ) ), TP_fast_assign( - __assign_str(name, dev_name(dev)); + __assign_str(name, regmap_name(map)); __entry->reg = reg; __entry->val = val; ), @@ -39,45 +38,45 @@ DECLARE_EVENT_CLASS(regmap_reg, DEFINE_EVENT(regmap_reg, regmap_reg_write, - TP_PROTO(struct device *dev, unsigned int reg, + TP_PROTO(struct regmap *map, unsigned int reg, unsigned int val), - TP_ARGS(dev, reg, val) + TP_ARGS(map, reg, val) ); DEFINE_EVENT(regmap_reg, regmap_reg_read, - TP_PROTO(struct device *dev, unsigned int reg, + TP_PROTO(struct regmap *map, unsigned int reg, unsigned int val), - TP_ARGS(dev, reg, val) + TP_ARGS(map, reg, val) ); DEFINE_EVENT(regmap_reg, regmap_reg_read_cache, - TP_PROTO(struct device *dev, unsigned int reg, + TP_PROTO(struct regmap *map, unsigned int reg, unsigned int val), - TP_ARGS(dev, reg, val) + TP_ARGS(map, reg, val) ); DECLARE_EVENT_CLASS(regmap_block, - TP_PROTO(struct device *dev, unsigned int reg, int count), + TP_PROTO(struct regmap *map, unsigned int reg, int count), - TP_ARGS(dev, reg, count), + TP_ARGS(map, reg, count), TP_STRUCT__entry( - __string( name, dev_name(dev) ) - __field( unsigned int, reg ) - __field( int, count ) + __string( name, regmap_name(map) ) + __field( unsigned int, reg ) + __field( int, count ) ), TP_fast_assign( - __assign_str(name, dev_name(dev)); + __assign_str(name, regmap_name(map)); __entry->reg = reg; __entry->count = count; ), @@ -89,48 +88,48 @@ DECLARE_EVENT_CLASS(regmap_block, DEFINE_EVENT(regmap_block, regmap_hw_read_start, - TP_PROTO(struct device *dev, unsigned int reg, int count), + TP_PROTO(struct regmap *map, unsigned int reg, int count), - TP_ARGS(dev, reg, count) + TP_ARGS(map, reg, count) ); DEFINE_EVENT(regmap_block, regmap_hw_read_done, - TP_PROTO(struct device *dev, unsigned int reg, int count), + TP_PROTO(struct regmap *map, unsigned int reg, int count), - TP_ARGS(dev, reg, count) + TP_ARGS(map, reg, count) ); DEFINE_EVENT(regmap_block, regmap_hw_write_start, - TP_PROTO(struct device *dev, unsigned int reg, int count), + TP_PROTO(struct regmap *map, unsigned int reg, int count), - TP_ARGS(dev, reg, count) + TP_ARGS(map, reg, count) ); DEFINE_EVENT(regmap_block, regmap_hw_write_done, - TP_PROTO(struct device *dev, unsigned int reg, int count), + TP_PROTO(struct regmap *map, unsigned int reg, int count), - TP_ARGS(dev, reg, count) + TP_ARGS(map, reg, count) ); TRACE_EVENT(regcache_sync, - TP_PROTO(struct device *dev, const char *type, + TP_PROTO(struct regmap *map, const char *type, const char *status), - TP_ARGS(dev, type, status), + TP_ARGS(map, type, status), TP_STRUCT__entry( - __string( name, dev_name(dev) ) - __string( status, status ) - __string( type, type ) - __field( int, type ) + __string( name, regmap_name(map) ) + __string( status, status ) + __string( type, type ) + __field( int, type ) ), TP_fast_assign( - __assign_str(name, dev_name(dev)); + __assign_str(name, regmap_name(map)); __assign_str(status, status); __assign_str(type, type); ), @@ -141,17 +140,17 @@ TRACE_EVENT(regcache_sync, DECLARE_EVENT_CLASS(regmap_bool, - TP_PROTO(struct device *dev, bool flag), + TP_PROTO(struct regmap *map, bool flag), - TP_ARGS(dev, flag), + TP_ARGS(map, flag), TP_STRUCT__entry( - __string( name, dev_name(dev) ) - __field( int, flag ) + __string( name, regmap_name(map) ) + __field( int, flag ) ), TP_fast_assign( - __assign_str(name, dev_name(dev)); + __assign_str(name, regmap_name(map)); __entry->flag = flag; ), @@ -161,32 +160,32 @@ DECLARE_EVENT_CLASS(regmap_bool, DEFINE_EVENT(regmap_bool, regmap_cache_only, - TP_PROTO(struct device *dev, bool flag), + TP_PROTO(struct regmap *map, bool flag), - TP_ARGS(dev, flag) + TP_ARGS(map, flag) ); DEFINE_EVENT(regmap_bool, regmap_cache_bypass, - TP_PROTO(struct device *dev, bool flag), + TP_PROTO(struct regmap *map, bool flag), - TP_ARGS(dev, flag) + TP_ARGS(map, flag) ); DECLARE_EVENT_CLASS(regmap_async, - TP_PROTO(struct device *dev), + TP_PROTO(struct regmap *map), - TP_ARGS(dev), + TP_ARGS(map), TP_STRUCT__entry( - __string( name, dev_name(dev) ) + __string( name, regmap_name(map) ) ), TP_fast_assign( - __assign_str(name, dev_name(dev)); + __assign_str(name, regmap_name(map)); ), TP_printk("%s", __get_str(name)) @@ -194,50 +193,50 @@ DECLARE_EVENT_CLASS(regmap_async, DEFINE_EVENT(regmap_block, regmap_async_write_start, - TP_PROTO(struct device *dev, unsigned int reg, int count), + TP_PROTO(struct regmap *map, unsigned int reg, int count), - TP_ARGS(dev, reg, count) + TP_ARGS(map, reg, count) ); DEFINE_EVENT(regmap_async, regmap_async_io_complete, - TP_PROTO(struct device *dev), + TP_PROTO(struct regmap *map), - TP_ARGS(dev) + TP_ARGS(map) ); DEFINE_EVENT(regmap_async, regmap_async_complete_start, - TP_PROTO(struct device *dev), + TP_PROTO(struct regmap *map), - TP_ARGS(dev) + TP_ARGS(map) ); DEFINE_EVENT(regmap_async, regmap_async_complete_done, - TP_PROTO(struct device *dev), + TP_PROTO(struct regmap *map), - TP_ARGS(dev) + TP_ARGS(map) ); TRACE_EVENT(regcache_drop_region, - TP_PROTO(struct device *dev, unsigned int from, + TP_PROTO(struct regmap *map, unsigned int from, unsigned int to), - TP_ARGS(dev, from, to), + TP_ARGS(map, from, to), TP_STRUCT__entry( - __string( name, dev_name(dev) ) - __field( unsigned int, from ) - __field( unsigned int, to ) + __string( name, regmap_name(map) ) + __field( unsigned int, from ) + __field( unsigned int, to ) ), TP_fast_assign( - __assign_str(name, dev_name(dev)); + __assign_str(name, regmap_name(map)); __entry->from = from; __entry->to = to; ), -- cgit v1.2.1 From 5b0d4b5514bbcce69b516d0742f2cfc84ebd6db3 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 19 Mar 2015 16:05:57 -0400 Subject: sparc: perf: Remove redundant perf_pmu_{en|dis}able calls perf_pmu_disable is called by core perf code before pmu->del and the enable function is called by core perf code afterwards. No need to call again within sparc_pmu_del. Ditto for pmu->add and sparc_pmu_add. Signed-off-by: David Ahern Acked-by: Bob Picco Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 46a5e4508752..6dc4e793df4c 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1101,7 +1101,6 @@ static void sparc_pmu_del(struct perf_event *event, int _flags) int i; local_irq_save(flags); - perf_pmu_disable(event->pmu); for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event[i]) { @@ -1127,7 +1126,6 @@ static void sparc_pmu_del(struct perf_event *event, int _flags) } } - perf_pmu_enable(event->pmu); local_irq_restore(flags); } @@ -1361,7 +1359,6 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) unsigned long flags; local_irq_save(flags); - perf_pmu_disable(event->pmu); n0 = cpuc->n_events; if (n0 >= sparc_pmu->max_hw_events) @@ -1394,7 +1391,6 @@ nocheck: ret = 0; out: - perf_pmu_enable(event->pmu); local_irq_restore(flags); return ret; } -- cgit v1.2.1 From d51291cb8f32bfae6b331e1838651f3ddefa73a5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 19 Mar 2015 16:06:17 -0400 Subject: sparc: perf: Make counting mode actually work Currently perf-stat (aka, counting mode) does not work: $ perf stat ls ... Performance counter stats for 'ls': 1.585665 task-clock (msec) # 0.580 CPUs utilized 24 context-switches # 0.015 M/sec 0 cpu-migrations # 0.000 K/sec 86 page-faults # 0.054 M/sec cycles stalled-cycles-frontend stalled-cycles-backend instructions branches branch-misses 0.002735100 seconds time elapsed The reason is that state is never reset (stays with PERF_HES_UPTODATE set). Add a call to sparc_pmu_enable_event during the added_event handling. Clean up the encoding since pmu_start calls sparc_pmu_enable_event which does the same. Passing PERF_EF_RELOAD to sparc_pmu_start means the call to sparc_perf_event_set_period can be removed as well. With this patch: $ perf stat ls ... Performance counter stats for 'ls': 1.552890 task-clock (msec) # 0.552 CPUs utilized 24 context-switches # 0.015 M/sec 0 cpu-migrations # 0.000 K/sec 86 page-faults # 0.055 M/sec 5,748,997 cycles # 3.702 GHz stalled-cycles-frontend:HG stalled-cycles-backend:HG 1,684,362 instructions:HG # 0.29 insns per cycle 295,133 branches:HG # 190.054 M/sec 28,007 branch-misses:HG # 9.49% of all branches 0.002815665 seconds time elapsed Signed-off-by: David Ahern Acked-by: Bob Picco Signed-off-by: David S. Miller --- arch/sparc/kernel/perf_event.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 6dc4e793df4c..af53c25da2e7 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -960,6 +960,8 @@ out: cpuc->pcr[0] |= cpuc->event[0]->hw.config_base; } +static void sparc_pmu_start(struct perf_event *event, int flags); + /* On this PMU each PIC has it's own PCR control register. */ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) { @@ -972,20 +974,13 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) struct perf_event *cp = cpuc->event[i]; struct hw_perf_event *hwc = &cp->hw; int idx = hwc->idx; - u64 enc; if (cpuc->current_idx[i] != PIC_NO_INDEX) continue; - sparc_perf_event_set_period(cp, hwc, idx); cpuc->current_idx[i] = idx; - enc = perf_event_get_enc(cpuc->events[i]); - cpuc->pcr[idx] &= ~mask_for_index(idx); - if (hwc->state & PERF_HES_STOPPED) - cpuc->pcr[idx] |= nop_for_index(idx); - else - cpuc->pcr[idx] |= event_encoding(enc, idx); + sparc_pmu_start(cp, PERF_EF_RELOAD); } out: for (i = 0; i < cpuc->n_events; i++) { -- cgit v1.2.1 From b5aff55d89c27aedcae9521155b81b6aebb6c5d8 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 19 Mar 2015 16:06:37 -0400 Subject: sparc: perf: Add support M7 processor The M7 processor has a different hypervisor group id and different PCR fast trap values. PIC read/write functions and PCR bit fields are the same as the T4 so those are reused. Signed-off-by: David Ahern Acked-by: Bob Picco Signed-off-by: David S. Miller --- arch/sparc/include/asm/hypervisor.h | 12 +++++++++++ arch/sparc/kernel/hvapi.c | 1 + arch/sparc/kernel/hvcalls.S | 16 +++++++++++++++ arch/sparc/kernel/pcr.c | 33 ++++++++++++++++++++++++++++++ arch/sparc/kernel/perf_event.c | 40 +++++++++++++++++++++++++++++++++++++ 5 files changed, 102 insertions(+) diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index 4f6725ff4c33..f5b6537306f0 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h @@ -2957,6 +2957,17 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num, unsigned long reg_val); #endif + +#define HV_FAST_M7_GET_PERFREG 0x43 +#define HV_FAST_M7_SET_PERFREG 0x44 + +#ifndef __ASSEMBLY__ +unsigned long sun4v_m7_get_perfreg(unsigned long reg_num, + unsigned long *reg_val); +unsigned long sun4v_m7_set_perfreg(unsigned long reg_num, + unsigned long reg_val); +#endif + /* Function numbers for HV_CORE_TRAP. */ #define HV_CORE_SET_VER 0x00 #define HV_CORE_PUTCHAR 0x01 @@ -2981,6 +2992,7 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num, #define HV_GRP_SDIO 0x0108 #define HV_GRP_SDIO_ERR 0x0109 #define HV_GRP_REBOOT_DATA 0x0110 +#define HV_GRP_M7_PERF 0x0114 #define HV_GRP_NIAG_PERF 0x0200 #define HV_GRP_FIRE_PERF 0x0201 #define HV_GRP_N2_CPU 0x0202 diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c index 5c55145bfbf0..662500fa555f 100644 --- a/arch/sparc/kernel/hvapi.c +++ b/arch/sparc/kernel/hvapi.c @@ -48,6 +48,7 @@ static struct api_info api_table[] = { { .group = HV_GRP_VT_CPU, }, { .group = HV_GRP_T5_CPU, }, { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API }, + { .group = HV_GRP_M7_PERF, }, }; static DEFINE_SPINLOCK(hvapi_lock); diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S index caedf8320416..afbaba52d2f1 100644 --- a/arch/sparc/kernel/hvcalls.S +++ b/arch/sparc/kernel/hvcalls.S @@ -837,3 +837,19 @@ ENTRY(sun4v_t5_set_perfreg) retl nop ENDPROC(sun4v_t5_set_perfreg) + +ENTRY(sun4v_m7_get_perfreg) + mov %o1, %o4 + mov HV_FAST_M7_GET_PERFREG, %o5 + ta HV_FAST_TRAP + stx %o1, [%o4] + retl + nop +ENDPROC(sun4v_m7_get_perfreg) + +ENTRY(sun4v_m7_set_perfreg) + mov HV_FAST_M7_SET_PERFREG, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_m7_set_perfreg) diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index 7e967c8018c8..eb978c77c76a 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c @@ -217,6 +217,31 @@ static const struct pcr_ops n5_pcr_ops = { .pcr_nmi_disable = PCR_N4_PICNPT, }; +static u64 m7_pcr_read(unsigned long reg_num) +{ + unsigned long val; + + (void) sun4v_m7_get_perfreg(reg_num, &val); + + return val; +} + +static void m7_pcr_write(unsigned long reg_num, u64 val) +{ + (void) sun4v_m7_set_perfreg(reg_num, val); +} + +static const struct pcr_ops m7_pcr_ops = { + .read_pcr = m7_pcr_read, + .write_pcr = m7_pcr_write, + .read_pic = n4_pic_read, + .write_pic = n4_pic_write, + .nmi_picl_value = n4_picl_value, + .pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE | + PCR_N4_UTRACE | PCR_N4_TOE | + (26 << PCR_N4_SL_SHIFT)), + .pcr_nmi_disable = PCR_N4_PICNPT, +}; static unsigned long perf_hsvc_group; static unsigned long perf_hsvc_major; @@ -248,6 +273,10 @@ static int __init register_perf_hsvc(void) perf_hsvc_group = HV_GRP_T5_CPU; break; + case SUN4V_CHIP_SPARC_M7: + perf_hsvc_group = HV_GRP_M7_PERF; + break; + default: return -ENODEV; } @@ -293,6 +322,10 @@ static int __init setup_sun4v_pcr_ops(void) pcr_ops = &n5_pcr_ops; break; + case SUN4V_CHIP_SPARC_M7: + pcr_ops = &m7_pcr_ops; + break; + default: ret = -ENODEV; break; diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index af53c25da2e7..86eebfa3b158 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -792,6 +792,42 @@ static const struct sparc_pmu niagara4_pmu = { .num_pic_regs = 4, }; +static void sparc_m7_write_pmc(int idx, u64 val) +{ + u64 pcr; + + pcr = pcr_ops->read_pcr(idx); + /* ensure ov and ntc are reset */ + pcr &= ~(PCR_N4_OV | PCR_N4_NTC); + + pcr_ops->write_pic(idx, val & 0xffffffff); + + pcr_ops->write_pcr(idx, pcr); +} + +static const struct sparc_pmu sparc_m7_pmu = { + .event_map = niagara4_event_map, + .cache_map = &niagara4_cache_map, + .max_events = ARRAY_SIZE(niagara4_perfmon_event_map), + .read_pmc = sparc_vt_read_pmc, + .write_pmc = sparc_m7_write_pmc, + .upper_shift = 5, + .lower_shift = 5, + .event_mask = 0x7ff, + .user_bit = PCR_N4_UTRACE, + .priv_bit = PCR_N4_STRACE, + + /* We explicitly don't support hypervisor tracing. */ + .hv_bit = 0, + + .irq_bit = PCR_N4_TOE, + .upper_nop = 0, + .lower_nop = 0, + .flags = 0, + .max_hw_events = 4, + .num_pcrs = 4, + .num_pic_regs = 4, +}; static const struct sparc_pmu *sparc_pmu __read_mostly; static u64 event_encoding(u64 event_id, int idx) @@ -1658,6 +1694,10 @@ static bool __init supported_pmu(void) sparc_pmu = &niagara4_pmu; return true; } + if (!strcmp(sparc_pmu_type, "sparc-m7")) { + sparc_pmu = &sparc_m7_pmu; + return true; + } return false; } -- cgit v1.2.1 From 31aaa98c248da766ece922bbbe8cc78cfd0bc920 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 19 Mar 2015 16:06:53 -0400 Subject: sparc: Touch NMI watchdog when walking cpus and calling printk With the increase in number of CPUs calls to functions that dump output to console (e.g., arch_trigger_all_cpu_backtrace) can take a long time to complete. If IRQs are disabled eventually the NMI watchdog kicks in and creates more havoc. Avoid by telling the NMI watchdog everything is ok. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- arch/sparc/kernel/process_64.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 0be7bf978cb1..46a59643bb1c 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -287,6 +287,8 @@ void arch_trigger_all_cpu_backtrace(bool include_self) printk(" TPC[%lx] O7[%lx] I7[%lx] RPC[%lx]\n", gp->tpc, gp->o7, gp->i7, gp->rpc); } + + touch_nmi_watchdog(); } memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot)); @@ -362,6 +364,8 @@ static void pmu_snapshot_all_cpus(void) (cpu == this_cpu ? '*' : ' '), cpu, pp->pcr[0], pp->pcr[1], pp->pcr[2], pp->pcr[3], pp->pic[0], pp->pic[1], pp->pic[2], pp->pic[3]); + + touch_nmi_watchdog(); } memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot)); -- cgit v1.2.1 From 755563bc79c764c90b9f44db5e4fe6c556d3440c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 19 Mar 2015 19:29:01 +1100 Subject: powerpc/powernv: Fixes for hypervisor doorbell handling Since we can now use hypervisor doorbells for host IPIs, this makes sure we clear the host IPI flag when taking a doorbell interrupt, and clears any pending doorbell IPI in pnv_smp_cpu_kill_self() (as we already do for IPIs sent via the XICS interrupt controller). Otherwise if there did happen to be a leftover pending doorbell interrupt for an offline CPU thread for any reason, it would prevent that thread from going into a power-saving mode; it would instead keep waking up because of the interrupt. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-opcode.h | 3 +++ arch/powerpc/include/asm/reg.h | 3 +++ arch/powerpc/kernel/dbell.c | 2 ++ arch/powerpc/platforms/powernv/smp.c | 14 ++++++++++++-- 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 03cd858a401c..4cbe23af400a 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -153,6 +153,7 @@ #define PPC_INST_MFSPR_PVR_MASK 0xfc1fffff #define PPC_INST_MFTMR 0x7c0002dc #define PPC_INST_MSGSND 0x7c00019c +#define PPC_INST_MSGCLR 0x7c0001dc #define PPC_INST_MSGSNDP 0x7c00011c #define PPC_INST_MTTMR 0x7c0003dc #define PPC_INST_NOP 0x60000000 @@ -309,6 +310,8 @@ ___PPC_RB(b) | __PPC_EH(eh)) #define PPC_MSGSND(b) stringify_in_c(.long PPC_INST_MSGSND | \ ___PPC_RB(b)) +#define PPC_MSGCLR(b) stringify_in_c(.long PPC_INST_MSGCLR | \ + ___PPC_RB(b)) #define PPC_MSGSNDP(b) stringify_in_c(.long PPC_INST_MSGSNDP | \ ___PPC_RB(b)) #define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_INST_POPCNTB | \ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 1c874fb533bb..af56b5c6c81a 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -608,13 +608,16 @@ #define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */ #define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */ #define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */ +#define SRR1_WAKEMASK_P8 0x003c0000 /* reason for wakeup on POWER8 */ #define SRR1_WAKESYSERR 0x00300000 /* System error */ #define SRR1_WAKEEE 0x00200000 /* External interrupt */ #define SRR1_WAKEMT 0x00280000 /* mtctrl */ #define SRR1_WAKEHMI 0x00280000 /* Hypervisor maintenance */ #define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */ +#define SRR1_WAKEDBELL 0x00140000 /* Privileged doorbell on P8 */ #define SRR1_WAKETHERM 0x00100000 /* Thermal management interrupt */ #define SRR1_WAKERESET 0x00100000 /* System reset */ +#define SRR1_WAKEHDBELL 0x000c0000 /* Hypervisor doorbell on P8 */ #define SRR1_WAKESTATE 0x00030000 /* Powersave exit mask [46:47] */ #define SRR1_WS_DEEPEST 0x00030000 /* Some resources not maintained, * may not be recoverable */ diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index f4217819cc31..2128f3a96c32 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -17,6 +17,7 @@ #include #include +#include #ifdef CONFIG_SMP void doorbell_setup_this_cpu(void) @@ -41,6 +42,7 @@ void doorbell_exception(struct pt_regs *regs) may_hard_irq_enable(); + kvmppc_set_host_ipi(smp_processor_id(), 0); __this_cpu_inc(irq_stat.doorbell_irqs); smp_ipi_demux(); diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index fc34025ef822..38a45088f633 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include "powernv.h" @@ -149,7 +151,7 @@ static int pnv_smp_cpu_disable(void) static void pnv_smp_cpu_kill_self(void) { unsigned int cpu; - unsigned long srr1; + unsigned long srr1, wmask; u32 idle_states; /* Standard hot unplug procedure */ @@ -161,6 +163,10 @@ static void pnv_smp_cpu_kill_self(void) generic_set_cpu_dead(cpu); smp_wmb(); + wmask = SRR1_WAKEMASK; + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + wmask = SRR1_WAKEMASK_P8; + idle_states = pnv_get_supported_cpuidle_states(); /* We don't want to take decrementer interrupts while we are offline, * so clear LPCR:PECE1. We keep PECE2 enabled. @@ -191,10 +197,14 @@ static void pnv_smp_cpu_kill_self(void) * having finished executing in a KVM guest, then srr1 * contains 0. */ - if ((srr1 & SRR1_WAKEMASK) == SRR1_WAKEEE) { + if ((srr1 & wmask) == SRR1_WAKEEE) { icp_native_flush_interrupt(); local_paca->irq_happened &= PACA_IRQ_HARD_DIS; smp_mb(); + } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) { + unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); + asm volatile(PPC_MSGCLR(%0) : : "r" (msg)); + kvmppc_set_host_ipi(cpu, 0); } if (cpu_core_split_required()) -- cgit v1.2.1 From ddee09c099c35074e50aaf9157efd22429d3acdf Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 19 Mar 2015 14:12:06 +1100 Subject: powerpc: Add PVR for POWER8NVL processor There's a new variant of POWER8 coming called "POWER8 with NVLink". The core is identical to POWER8 but unfortunately they strapped it with a different PVR, so we need to add an explicit entry for it. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/cputable.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index f337666768a7..f83046878336 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -437,6 +437,26 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, + { /* Power8NVL */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004c0000, + .cpu_name = "POWER8NVL (raw)", + .cpu_features = CPU_FTRS_POWER8, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power8", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .flush_tlb = __flush_tlb_power8, + .machine_check_early = __machine_check_early_realmode_p8, + .platform = "power8", + }, { /* Power8 DD1: Does not support doorbell IPIs */ .pvr_mask = 0xffffff00, .pvr_value = 0x004d0100, -- cgit v1.2.1 From f6ff04149637723261aa4738958b0098b929ee9e Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Wed, 4 Mar 2015 11:59:33 -0800 Subject: powerpc/pseries: Little endian fixes for post mobility device tree update We currently use the device tree update code in the kernel after resuming from a suspend operation to re-sync the kernels view of the device tree with that of the hypervisor. The code as it stands is not endian safe as it relies on parsing buffers returned by RTAS calls that thusly contains data in big endian format. This patch annotates variables and structure members with __be types as well as performing necessary byte swaps to cpu endian for data that needs to be parsed. Signed-off-by: Tyrel Datwyler Cc: Nathan Fontenot Cc: Cyril Bur Cc: stable@vger.kernel.org Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/mobility.c | 44 ++++++++++++++++--------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 90cf3dcbd9f2..8f35d525cede 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -25,10 +25,10 @@ static struct kobject *mobility_kobj; struct update_props_workarea { - u32 phandle; - u32 state; - u64 reserved; - u32 nprops; + __be32 phandle; + __be32 state; + __be64 reserved; + __be32 nprops; } __packed; #define NODE_ACTION_MASK 0xff000000 @@ -54,11 +54,11 @@ static int mobility_rtas_call(int token, char *buf, s32 scope) return rc; } -static int delete_dt_node(u32 phandle) +static int delete_dt_node(__be32 phandle) { struct device_node *dn; - dn = of_find_node_by_phandle(phandle); + dn = of_find_node_by_phandle(be32_to_cpu(phandle)); if (!dn) return -ENOENT; @@ -127,7 +127,7 @@ static int update_dt_property(struct device_node *dn, struct property **prop, return 0; } -static int update_dt_node(u32 phandle, s32 scope) +static int update_dt_node(__be32 phandle, s32 scope) { struct update_props_workarea *upwa; struct device_node *dn; @@ -136,6 +136,7 @@ static int update_dt_node(u32 phandle, s32 scope) char *prop_data; char *rtas_buf; int update_properties_token; + u32 nprops; u32 vd; update_properties_token = rtas_token("ibm,update-properties"); @@ -146,7 +147,7 @@ static int update_dt_node(u32 phandle, s32 scope) if (!rtas_buf) return -ENOMEM; - dn = of_find_node_by_phandle(phandle); + dn = of_find_node_by_phandle(be32_to_cpu(phandle)); if (!dn) { kfree(rtas_buf); return -ENOENT; @@ -162,6 +163,7 @@ static int update_dt_node(u32 phandle, s32 scope) break; prop_data = rtas_buf + sizeof(*upwa); + nprops = be32_to_cpu(upwa->nprops); /* On the first call to ibm,update-properties for a node the * the first property value descriptor contains an empty @@ -170,17 +172,17 @@ static int update_dt_node(u32 phandle, s32 scope) */ if (*prop_data == 0) { prop_data++; - vd = *(u32 *)prop_data; + vd = be32_to_cpu(*(__be32 *)prop_data); prop_data += vd + sizeof(vd); - upwa->nprops--; + nprops--; } - for (i = 0; i < upwa->nprops; i++) { + for (i = 0; i < nprops; i++) { char *prop_name; prop_name = prop_data; prop_data += strlen(prop_name) + 1; - vd = *(u32 *)prop_data; + vd = be32_to_cpu(*(__be32 *)prop_data); prop_data += sizeof(vd); switch (vd) { @@ -212,13 +214,13 @@ static int update_dt_node(u32 phandle, s32 scope) return 0; } -static int add_dt_node(u32 parent_phandle, u32 drc_index) +static int add_dt_node(__be32 parent_phandle, __be32 drc_index) { struct device_node *dn; struct device_node *parent_dn; int rc; - parent_dn = of_find_node_by_phandle(parent_phandle); + parent_dn = of_find_node_by_phandle(be32_to_cpu(parent_phandle)); if (!parent_dn) return -ENOENT; @@ -237,7 +239,7 @@ static int add_dt_node(u32 parent_phandle, u32 drc_index) int pseries_devicetree_update(s32 scope) { char *rtas_buf; - u32 *data; + __be32 *data; int update_nodes_token; int rc; @@ -254,17 +256,17 @@ int pseries_devicetree_update(s32 scope) if (rc && rc != 1) break; - data = (u32 *)rtas_buf + 4; - while (*data & NODE_ACTION_MASK) { + data = (__be32 *)rtas_buf + 4; + while (be32_to_cpu(*data) & NODE_ACTION_MASK) { int i; - u32 action = *data & NODE_ACTION_MASK; - int node_count = *data & NODE_COUNT_MASK; + u32 action = be32_to_cpu(*data) & NODE_ACTION_MASK; + u32 node_count = be32_to_cpu(*data) & NODE_COUNT_MASK; data++; for (i = 0; i < node_count; i++) { - u32 phandle = *data++; - u32 drc_index; + __be32 phandle = *data++; + __be32 drc_index; switch (action) { case DELETE_DT_NODE: -- cgit v1.2.1 From 8f902b005ece690f0f50b217975601b804905dc8 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 20 Mar 2015 20:39:38 +1100 Subject: KVM: PPC: Book3S HV: Fix spinlock/mutex ordering issue in kvmppc_set_lpcr() Currently, kvmppc_set_lpcr() has a spinlock around the whole function, and inside that does mutex_lock(&kvm->lock). It is not permitted to take a mutex while holding a spinlock, because the mutex_lock might call schedule(). In addition, this causes lockdep to warn about a lock ordering issue: ====================================================== [ INFO: possible circular locking dependency detected ] 3.18.0-kvm-04645-gdfea862-dirty #131 Not tainted ------------------------------------------------------- qemu-system-ppc/8179 is trying to acquire lock: (&kvm->lock){+.+.+.}, at: [] .kvmppc_set_lpcr+0xf4/0x1c0 [kvm_hv] but task is already holding lock: (&(&vcore->lock)->rlock){+.+...}, at: [] .kvmppc_set_lpcr+0x40/0x1c0 [kvm_hv] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&(&vcore->lock)->rlock){+.+...}: [] .mutex_lock_nested+0x80/0x570 [] .kvmppc_vcpu_run_hv+0xc4/0xe40 [kvm_hv] [] .kvmppc_vcpu_run+0x2c/0x40 [kvm] [] .kvm_arch_vcpu_ioctl_run+0x54/0x160 [kvm] [] .kvm_vcpu_ioctl+0x4a8/0x7b0 [kvm] [] .do_vfs_ioctl+0x444/0x770 [] .SyS_ioctl+0xc4/0xe0 [] syscall_exit+0x0/0x98 -> #0 (&kvm->lock){+.+.+.}: [] .lock_acquire+0xcc/0x1a0 [] .mutex_lock_nested+0x80/0x570 [] .kvmppc_set_lpcr+0xf4/0x1c0 [kvm_hv] [] .kvmppc_set_one_reg_hv+0x4dc/0x990 [kvm_hv] [] .kvmppc_set_one_reg+0x44/0x330 [kvm] [] .kvm_vcpu_ioctl_set_one_reg+0x5c/0x150 [kvm] [] .kvm_arch_vcpu_ioctl+0x214/0x2c0 [kvm] [] .kvm_vcpu_ioctl+0xe0/0x7b0 [kvm] [] .do_vfs_ioctl+0x444/0x770 [] .SyS_ioctl+0xc4/0xe0 [] syscall_exit+0x0/0x98 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&(&vcore->lock)->rlock); lock(&kvm->lock); lock(&(&vcore->lock)->rlock); lock(&kvm->lock); *** DEADLOCK *** 2 locks held by qemu-system-ppc/8179: #0: (&vcpu->mutex){+.+.+.}, at: [] .vcpu_load+0x28/0x90 [kvm] #1: (&(&vcore->lock)->rlock){+.+...}, at: [] .kvmppc_set_lpcr+0x40/0x1c0 [kvm_hv] stack backtrace: CPU: 4 PID: 8179 Comm: qemu-system-ppc Not tainted 3.18.0-kvm-04645-gdfea862-dirty #131 Call Trace: [c000001a66c0f310] [c000000000b486ac] .dump_stack+0x88/0xb4 (unreliable) [c000001a66c0f390] [c0000000000f8bec] .print_circular_bug+0x27c/0x3d0 [c000001a66c0f440] [c0000000000fe9e8] .__lock_acquire+0x2028/0x2190 [c000001a66c0f5d0] [c0000000000ff28c] .lock_acquire+0xcc/0x1a0 [c000001a66c0f6a0] [c000000000b3c120] .mutex_lock_nested+0x80/0x570 [c000001a66c0f7c0] [d00000000ecc1f54] .kvmppc_set_lpcr+0xf4/0x1c0 [kvm_hv] [c000001a66c0f860] [d00000000ecc510c] .kvmppc_set_one_reg_hv+0x4dc/0x990 [kvm_hv] [c000001a66c0f8d0] [d00000000eb9f234] .kvmppc_set_one_reg+0x44/0x330 [kvm] [c000001a66c0f960] [d00000000eb9c9dc] .kvm_vcpu_ioctl_set_one_reg+0x5c/0x150 [kvm] [c000001a66c0f9f0] [d00000000eb9ced4] .kvm_arch_vcpu_ioctl+0x214/0x2c0 [kvm] [c000001a66c0faf0] [d00000000eb940b0] .kvm_vcpu_ioctl+0xe0/0x7b0 [kvm] [c000001a66c0fcb0] [c00000000026cbb4] .do_vfs_ioctl+0x444/0x770 [c000001a66c0fd90] [c00000000026cfa4] .SyS_ioctl+0xc4/0xe0 [c000001a66c0fe30] [c000000000009264] syscall_exit+0x0/0x98 This fixes it by moving the mutex_lock()/mutex_unlock() pair outside the spin-locked region. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_hv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index de4018a1bc4b..b2731933f807 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -942,20 +942,20 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu, static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, bool preserve_top32) { + struct kvm *kvm = vcpu->kvm; struct kvmppc_vcore *vc = vcpu->arch.vcore; u64 mask; + mutex_lock(&kvm->lock); spin_lock(&vc->lock); /* * If ILE (interrupt little-endian) has changed, update the * MSR_LE bit in the intr_msr for each vcpu in this vcore. */ if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) { - struct kvm *kvm = vcpu->kvm; struct kvm_vcpu *vcpu; int i; - mutex_lock(&kvm->lock); kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu->arch.vcore != vc) continue; @@ -964,7 +964,6 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, else vcpu->arch.intr_msr &= ~MSR_LE; } - mutex_unlock(&kvm->lock); } /* @@ -981,6 +980,7 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, mask &= 0xFFFFFFFF; vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask); spin_unlock(&vc->lock); + mutex_unlock(&kvm->lock); } static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, -- cgit v1.2.1 From ecb6d6185b3ae40067330eb889977bf2a51f7429 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 20 Mar 2015 20:39:39 +1100 Subject: KVM: PPC: Book3S HV: Endian fix for accessing VPA yield count The VPA (virtual processor area) is defined by PAPR and is therefore big-endian, so we need a be32_to_cpu when reading it in kvmppc_get_yield_count(). Without this, H_CONFER always fails on a little-endian host, causing SMP guests to waste time spinning on spinlocks. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_hv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index b2731933f807..de747563d29d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -636,7 +636,7 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu) spin_lock(&vcpu->arch.vpa_update_lock); lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr; if (lppaca) - yield_count = lppaca->yield_count; + yield_count = be32_to_cpu(lppaca->yield_count); spin_unlock(&vcpu->arch.vpa_update_lock); return yield_count; } -- cgit v1.2.1 From 2bf27601c7b50b6ced72f27304109dc52eb52919 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 20 Mar 2015 20:39:40 +1100 Subject: KVM: PPC: Book3S HV: Fix instruction emulation Commit 4a157d61b48c ("KVM: PPC: Book3S HV: Fix endianness of instruction obtained from HEIR register") had the side effect that we no longer reset vcpu->arch.last_inst to -1 on guest exit in the cases where the instruction is not fetched from the guest. This means that if instruction emulation turns out to be required in those cases, the host will emulate the wrong instruction, since vcpu->arch.last_inst will contain the last instruction that was emulated. This fixes it by making sure that vcpu->arch.last_inst is reset to -1 in those cases. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index bb94e6f20c81..6cbf1630cb70 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1005,6 +1005,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) /* Save HEIR (HV emulation assist reg) in emul_inst if this is an HEI (HV emulation interrupt, e40) */ li r3,KVM_INST_FETCH_FAILED + stw r3,VCPU_LAST_INST(r9) cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST bne 11f mfspr r3,SPRN_HEIR -- cgit v1.2.1 From 3d8c6dce53a349df8878d078e56bf429bad572f9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 20 Mar 2015 13:56:06 +0100 Subject: netfilter: xt_TPROXY: fix invflags check in tproxy_tg6_check() We have to check for IP6T_INV_PROTO in invflags, instead of flags. Signed-off-by: Pablo Neira Ayuso Acked-by: Balazs Scheidler --- net/netfilter/xt_TPROXY.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index ef8a926752a9..50e1e5aaf4ce 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -513,8 +513,8 @@ static int tproxy_tg6_check(const struct xt_tgchk_param *par) { const struct ip6t_ip6 *i = par->entryinfo; - if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) - && !(i->flags & IP6T_INV_PROTO)) + if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) && + !(i->invflags & IP6T_INV_PROTO)) return 0; pr_info("Can be used only in combination with " -- cgit v1.2.1 From 7ee8e4f3983c4ff700958a6099c8fd212ea67b94 Mon Sep 17 00:00:00 2001 From: Wenbo Wang Date: Fri, 20 Mar 2015 01:04:54 -0400 Subject: Fix bug in blk_rq_merge_ok Use the right array index to reference the last element of rq->biotail->bi_io_vec[] Signed-off-by: Wenbo Wang Reviewed-by: Chong Yuan Fixes: 66cb45aa41315 ("block: add support for limiting gaps in SG lists") Cc: stable@kernel.org Signed-off-by: Jens Axboe --- block/blk-merge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index fc1ff3b1ea1f..fd3fee81c23c 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -592,7 +592,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS)) { struct bio_vec *bprev; - bprev = &rq->biotail->bi_io_vec[bio->bi_vcnt - 1]; + bprev = &rq->biotail->bi_io_vec[rq->biotail->bi_vcnt - 1]; if (bvec_gap_to_prev(bprev, bio->bi_io_vec[0].bv_offset)) return false; } -- cgit v1.2.1 From 8e199dfd82ee097b522b00344af6448715d8ee0c Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 19 Mar 2015 11:22:32 +0100 Subject: ipv6: call ipv6_proxy_select_ident instead of ipv6_select_ident in udp6_ufo_fragment Matt Grant reported frequent crashes in ipv6_select_ident when udp6_ufo_fragment is called from openvswitch on a skb that doesn't have a dst_entry set. ipv6_proxy_select_ident generates the frag_id without using the dst associated with the skb. This approach was suggested by Vladislav Yasevich. Fixes: 0508c07f5e0c ("ipv6: Select fragment id during UFO segmentation if not set.") Cc: Vladislav Yasevich Reported-by: Matt Grant Tested-by: Matt Grant Signed-off-by: Sabrina Dubroca Acked-by: Vladislav Yasevich Signed-off-by: David S. Miller --- net/ipv6/udp_offload.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index ab889bb16b3c..be2c0ba82c85 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -112,11 +112,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); fptr->nexthdr = nexthdr; fptr->reserved = 0; - if (skb_shinfo(skb)->ip6_frag_id) - fptr->identification = skb_shinfo(skb)->ip6_frag_id; - else - ipv6_select_ident(fptr, - (struct rt6_info *)skb_dst(skb)); + if (!skb_shinfo(skb)->ip6_frag_id) + ipv6_proxy_select_ident(skb); + fptr->identification = skb_shinfo(skb)->ip6_frag_id; /* Fragment the skb. ipv6 header and the remaining fields of the * fragment header are updated in ipv6_gso_segment() -- cgit v1.2.1 From 87f966d97b89774162df04d2106c6350c8fe4cb3 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Thu, 19 Mar 2015 10:28:14 +0000 Subject: net: ethernet: pcnet32: Setup the SRAM and NOUFLO on Am79C97{3, 5} On a MIPS Malta board, tons of fifo underflow errors have been observed when using u-boot as bootloader instead of YAMON. The reason for that is that YAMON used to set the pcnet device to SRAM mode but u-boot does not. As a result, the default Tx threshold (64 bytes) is now too small to keep the fifo relatively used and it can result to Tx fifo underflow errors. As a result of which, it's best to setup the SRAM on supported controllers so we can always use the NOUFLO bit. Cc: Cc: Cc: Cc: Don Fry Signed-off-by: Markos Chandras Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/pcnet32.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index 11d6e6561df1..15a8190a6f75 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -1543,7 +1543,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) { struct pcnet32_private *lp; int i, media; - int fdx, mii, fset, dxsuflo; + int fdx, mii, fset, dxsuflo, sram; int chip_version; char *chipname; struct net_device *dev; @@ -1580,7 +1580,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) } /* initialize variables */ - fdx = mii = fset = dxsuflo = 0; + fdx = mii = fset = dxsuflo = sram = 0; chip_version = (chip_version >> 12) & 0xffff; switch (chip_version) { @@ -1613,6 +1613,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) chipname = "PCnet/FAST III 79C973"; /* PCI */ fdx = 1; mii = 1; + sram = 1; break; case 0x2626: chipname = "PCnet/Home 79C978"; /* PCI */ @@ -1636,6 +1637,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) chipname = "PCnet/FAST III 79C975"; /* PCI */ fdx = 1; mii = 1; + sram = 1; break; case 0x2628: chipname = "PCnet/PRO 79C976"; @@ -1664,6 +1666,31 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) dxsuflo = 1; } + /* + * The Am79C973/Am79C975 controllers come with 12K of SRAM + * which we can use for the Tx/Rx buffers but most importantly, + * the use of SRAM allow us to use the BCR18:NOUFLO bit to avoid + * Tx fifo underflows. + */ + if (sram) { + /* + * The SRAM is being configured in two steps. First we + * set the SRAM size in the BCR25:SRAM_SIZE bits. According + * to the datasheet, each bit corresponds to a 512-byte + * page so we can have at most 24 pages. The SRAM_SIZE + * holds the value of the upper 8 bits of the 16-bit SRAM size. + * The low 8-bits start at 0x00 and end at 0xff. So the + * address range is from 0x0000 up to 0x17ff. Therefore, + * the SRAM_SIZE is set to 0x17. The next step is to set + * the BCR26:SRAM_BND midway through so the Tx and Rx + * buffers can share the SRAM equally. + */ + a->write_bcr(ioaddr, 25, 0x17); + a->write_bcr(ioaddr, 26, 0xc); + /* And finally enable the NOUFLO bit */ + a->write_bcr(ioaddr, 18, a->read_bcr(ioaddr, 18) | (1 << 11)); + } + dev = alloc_etherdev(sizeof(*lp)); if (!dev) { ret = -ENOMEM; -- cgit v1.2.1 From 73ba57bfae4a1914f6a6dac71e3168dd900e00af Mon Sep 17 00:00:00 2001 From: Steven Barth Date: Thu, 19 Mar 2015 16:16:04 +0100 Subject: ipv6: fix backtracking for throw routes for throw routes to trigger evaluation of other policy rules EAGAIN needs to be propagated up to fib_rules_lookup similar to how its done for IPv4 A simple testcase for verification is: ip -6 rule add lookup 33333 priority 33333 ip -6 route add throw 2001:db8::1 ip -6 route add 2001:db8::1 via fe80::1 dev wlan0 table 33333 ip route get 2001:db8::1 Signed-off-by: Steven Barth Signed-off-by: David S. Miller --- net/ipv6/fib6_rules.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index b4d5e1d97c1b..27ca79682efb 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -104,6 +104,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, goto again; flp6->saddr = saddr; } + err = rt->dst.error; goto out; } again: -- cgit v1.2.1 From d22e1537181188e5dc8cbc51451832625035bdc2 Mon Sep 17 00:00:00 2001 From: Josh Hunt Date: Thu, 19 Mar 2015 19:19:30 -0400 Subject: tcp: fix tcp fin memory accounting tcp_send_fin() does not account for the memory it allocates properly, so sk_forward_alloc can be negative in cases where we've sent a FIN: ss example output (ss -amn | grep -B1 f4294): tcp FIN-WAIT-1 0 1 192.168.0.1:45520 192.0.2.1:8080 skmem:(r0,rb87380,t0,tb87380,f4294966016,w1280,o0,bl0) Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a2a796c5536b..1db253e36045 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2773,15 +2773,11 @@ void tcp_send_fin(struct sock *sk) } else { /* Socket is locked, keep trying until memory is available. */ for (;;) { - skb = alloc_skb_fclone(MAX_TCP_HEADER, - sk->sk_allocation); + skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); if (skb) break; yield(); } - - /* Reserve space for headers and prepare control bits. */ - skb_reserve(skb, MAX_TCP_HEADER); /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ tcp_init_nondata_skb(skb, tp->write_seq, TCPHDR_ACK | TCPHDR_FIN); -- cgit v1.2.1 From 435452aa88474fae5a31fd14fca88f0802e66f53 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Fri, 20 Mar 2015 06:28:23 -0400 Subject: be2net: Prevent VFs from enabling VLAN promiscuous mode Currently, a PF does not restrict its VF interface from enabling vlan promiscuous mode. This breaks vlan isolation when a vlan (transparent tagging) is configured on a VF. This patch fixes this problem by disabling the vlan promisc capability for VFs. Reported-by: Yoann Juet Signed-off-by: Vasundhara Volam Signed-off-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be.h | 1 + drivers/net/ethernet/emulex/benet/be_cmds.c | 3 +- drivers/net/ethernet/emulex/benet/be_cmds.h | 2 +- drivers/net/ethernet/emulex/benet/be_main.c | 98 ++++++++++++++++++++++------- 4 files changed, 80 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 27de37aa90af..92eb0c82bb04 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -354,6 +354,7 @@ struct be_vf_cfg { u16 vlan_tag; u32 tx_rate; u32 plink_tracking; + u32 privileges; }; enum vf_state { diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 36916cfa70f9..3e894f449cc1 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -1918,7 +1918,7 @@ int be_cmd_modify_eqd(struct be_adapter *adapter, struct be_set_eqd *set_eqd, /* Uses sycnhronous mcc */ int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array, - u32 num) + u32 num, u32 domain) { struct be_mcc_wrb *wrb; struct be_cmd_req_vlan_config *req; @@ -1936,6 +1936,7 @@ int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array, be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, OPCODE_COMMON_NTWK_VLAN_CONFIG, sizeof(*req), wrb, NULL); + req->hdr.domain = domain; req->interface_id = if_id; req->untagged = BE_IF_FLAGS_UNTAGGED & be_if_cap_flags(adapter) ? 1 : 0; diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index db761e8e42a3..a7634a3f052a 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -2256,7 +2256,7 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter, int be_cmd_get_fw_ver(struct be_adapter *adapter); int be_cmd_modify_eqd(struct be_adapter *adapter, struct be_set_eqd *, int num); int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array, - u32 num); + u32 num, u32 domain); int be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 status); int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc); int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 0a816859aca5..eb2bed92bf60 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1171,7 +1171,7 @@ static int be_vid_config(struct be_adapter *adapter) for_each_set_bit(i, adapter->vids, VLAN_N_VID) vids[num++] = cpu_to_le16(i); - status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num); + status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0); if (status) { dev_err(dev, "Setting HW VLAN filtering failed\n"); /* Set to VLAN promisc mode as setting VLAN filter failed */ @@ -1380,11 +1380,67 @@ static int be_get_vf_config(struct net_device *netdev, int vf, return 0; } +static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan) +{ + struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf]; + u16 vids[BE_NUM_VLANS_SUPPORTED]; + int vf_if_id = vf_cfg->if_handle; + int status; + + /* Enable Transparent VLAN Tagging */ + status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0); + if (status) + return status; + + /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */ + vids[0] = 0; + status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1); + if (!status) + dev_info(&adapter->pdev->dev, + "Cleared guest VLANs on VF%d", vf); + + /* After TVT is enabled, disallow VFs to program VLAN filters */ + if (vf_cfg->privileges & BE_PRIV_FILTMGMT) { + status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges & + ~BE_PRIV_FILTMGMT, vf + 1); + if (!status) + vf_cfg->privileges &= ~BE_PRIV_FILTMGMT; + } + return 0; +} + +static int be_clear_vf_tvt(struct be_adapter *adapter, int vf) +{ + struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf]; + struct device *dev = &adapter->pdev->dev; + int status; + + /* Reset Transparent VLAN Tagging. */ + status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1, + vf_cfg->if_handle, 0); + if (status) + return status; + + /* Allow VFs to program VLAN filtering */ + if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) { + status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges | + BE_PRIV_FILTMGMT, vf + 1); + if (!status) { + vf_cfg->privileges |= BE_PRIV_FILTMGMT; + dev_info(dev, "VF%d: FILTMGMT priv enabled", vf); + } + } + + dev_info(dev, + "Disable/re-enable i/f in VM to clear Transparent VLAN tag"); + return 0; +} + static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) { struct be_adapter *adapter = netdev_priv(netdev); struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf]; - int status = 0; + int status; if (!sriov_enabled(adapter)) return -EPERM; @@ -1394,24 +1450,19 @@ static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) if (vlan || qos) { vlan |= qos << VLAN_PRIO_SHIFT; - if (vf_cfg->vlan_tag != vlan) - status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, - vf_cfg->if_handle, 0); + status = be_set_vf_tvt(adapter, vf, vlan); } else { - /* Reset Transparent Vlan Tagging. */ - status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, - vf + 1, vf_cfg->if_handle, 0); + status = be_clear_vf_tvt(adapter, vf); } if (status) { dev_err(&adapter->pdev->dev, - "VLAN %d config on VF %d failed : %#x\n", vlan, - vf, status); + "VLAN %d config on VF %d failed : %#x\n", vlan, vf, + status); return be_cmd_status(status); } vf_cfg->vlan_tag = vlan; - return 0; } @@ -3339,7 +3390,6 @@ static int be_if_create(struct be_adapter *adapter, u32 *if_handle, u32 cap_flags, u32 vf) { u32 en_flags; - int status; en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST | BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS | @@ -3347,10 +3397,7 @@ static int be_if_create(struct be_adapter *adapter, u32 *if_handle, en_flags &= cap_flags; - status = be_cmd_if_create(adapter, cap_flags, en_flags, - if_handle, vf); - - return status; + return be_cmd_if_create(adapter, cap_flags, en_flags, if_handle, vf); } static int be_vfs_if_create(struct be_adapter *adapter) @@ -3368,8 +3415,13 @@ static int be_vfs_if_create(struct be_adapter *adapter) if (!BE3_chip(adapter)) { status = be_cmd_get_profile_config(adapter, &res, vf + 1); - if (!status) + if (!status) { cap_flags = res.if_cap_flags; + /* Prevent VFs from enabling VLAN promiscuous + * mode + */ + cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS; + } } status = be_if_create(adapter, &vf_cfg->if_handle, @@ -3403,7 +3455,6 @@ static int be_vf_setup(struct be_adapter *adapter) struct device *dev = &adapter->pdev->dev; struct be_vf_cfg *vf_cfg; int status, old_vfs, vf; - u32 privileges; old_vfs = pci_num_vf(adapter->pdev); @@ -3433,15 +3484,18 @@ static int be_vf_setup(struct be_adapter *adapter) for_all_vfs(adapter, vf_cfg, vf) { /* Allow VFs to programs MAC/VLAN filters */ - status = be_cmd_get_fn_privileges(adapter, &privileges, vf + 1); - if (!status && !(privileges & BE_PRIV_FILTMGMT)) { + status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges, + vf + 1); + if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) { status = be_cmd_set_fn_privileges(adapter, - privileges | + vf_cfg->privileges | BE_PRIV_FILTMGMT, vf + 1); - if (!status) + if (!status) { + vf_cfg->privileges |= BE_PRIV_FILTMGMT; dev_info(dev, "VF%d has FILTMGMT privilege\n", vf); + } } /* Allow full available bandwidth */ -- cgit v1.2.1 From c8ba4ad0b59c511578f8f706aae711f01c990794 Mon Sep 17 00:00:00 2001 From: Suresh Reddy Date: Fri, 20 Mar 2015 06:28:24 -0400 Subject: be2net: restrict MODIFY_EQ_DELAY cmd to a max of 8 EQs Issuing this cmd for more than 8 EQs does not have the intended effect even on BEx and Skyhawk-R. This patch fixes this by issuing this cmd for upto 8 EQs at a time. Signed-off-by: Suresh Reddy Signed-off-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 3e894f449cc1..7f05f309e935 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -1902,15 +1902,11 @@ int be_cmd_modify_eqd(struct be_adapter *adapter, struct be_set_eqd *set_eqd, { int num_eqs, i = 0; - if (lancer_chip(adapter) && num > 8) { - while (num) { - num_eqs = min(num, 8); - __be_cmd_modify_eqd(adapter, &set_eqd[i], num_eqs); - i += num_eqs; - num -= num_eqs; - } - } else { - __be_cmd_modify_eqd(adapter, set_eqd, num); + while (num) { + num_eqs = min(num, 8); + __be_cmd_modify_eqd(adapter, &set_eqd[i], num_eqs); + i += num_eqs; + num -= num_eqs; } return 0; -- cgit v1.2.1 From 25848c9015964d9d97dffd48bbb88b75a25d0a1b Mon Sep 17 00:00:00 2001 From: Suresh Reddy Date: Fri, 20 Mar 2015 06:28:25 -0400 Subject: be2net: use PCI MMIO read instead of config read for errors When an EEH error occurs, the device/slot is disconnected. This condition is more reliably detected (i.e., returns all ones) with an MMIO read rather than a config read -- especially on power platforms. Hence, this patch fixes EEH error detection by replacing config reads with MMIO reads for reading the error registers. The error registers in Skyhawk-R/BE2/BE3 are accessible both via the config space and the PCICFG (BAR0) memory space. Reported-by: Gavin Shan Signed-off-by: Suresh Reddy Signed-off-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be.h | 1 + drivers/net/ethernet/emulex/benet/be_main.c | 33 +++++++++++++++++++---------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 92eb0c82bb04..27b9fe99a9bd 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -424,6 +424,7 @@ struct be_adapter { u8 __iomem *csr; /* CSR BAR used only for BE2/3 */ u8 __iomem *db; /* Door Bell */ + u8 __iomem *pcicfg; /* On SH,BEx only. Shadow of PCI config space */ struct mutex mbox_lock; /* For serializing mbox cmds to BE card */ struct be_dma_mem mbox_mem; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index eb2bed92bf60..e6b790f0d9dc 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2823,14 +2823,12 @@ void be_detect_error(struct be_adapter *adapter) } } } else { - pci_read_config_dword(adapter->pdev, - PCICFG_UE_STATUS_LOW, &ue_lo); - pci_read_config_dword(adapter->pdev, - PCICFG_UE_STATUS_HIGH, &ue_hi); - pci_read_config_dword(adapter->pdev, - PCICFG_UE_STATUS_LOW_MASK, &ue_lo_mask); - pci_read_config_dword(adapter->pdev, - PCICFG_UE_STATUS_HI_MASK, &ue_hi_mask); + ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW); + ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH); + ue_lo_mask = ioread32(adapter->pcicfg + + PCICFG_UE_STATUS_LOW_MASK); + ue_hi_mask = ioread32(adapter->pcicfg + + PCICFG_UE_STATUS_HI_MASK); ue_lo = (ue_lo & ~ue_lo_mask); ue_hi = (ue_hi & ~ue_hi_mask); @@ -4874,24 +4872,37 @@ static int be_roce_map_pci_bars(struct be_adapter *adapter) static int be_map_pci_bars(struct be_adapter *adapter) { + struct pci_dev *pdev = adapter->pdev; u8 __iomem *addr; if (BEx_chip(adapter) && be_physfn(adapter)) { - adapter->csr = pci_iomap(adapter->pdev, 2, 0); + adapter->csr = pci_iomap(pdev, 2, 0); if (!adapter->csr) return -ENOMEM; } - addr = pci_iomap(adapter->pdev, db_bar(adapter), 0); + addr = pci_iomap(pdev, db_bar(adapter), 0); if (!addr) goto pci_map_err; adapter->db = addr; + if (skyhawk_chip(adapter) || BEx_chip(adapter)) { + if (be_physfn(adapter)) { + /* PCICFG is the 2nd BAR in BE2 */ + addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0); + if (!addr) + goto pci_map_err; + adapter->pcicfg = addr; + } else { + adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET; + } + } + be_roce_map_pci_bars(adapter); return 0; pci_map_err: - dev_err(&adapter->pdev->dev, "Error in mapping PCI BARs\n"); + dev_err(&pdev->dev, "Error in mapping PCI BARs\n"); be_unmap_pci_bars(adapter); return -ENOMEM; } -- cgit v1.2.1 From 91edd096e224941131f896b86838b1e59553696a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 20 Mar 2015 16:48:13 +0000 Subject: net: compat: Update get_compat_msghdr() to match copy_msghdr_from_user() behaviour Commit db31c55a6fb2 (net: clamp ->msg_namelen instead of returning an error) introduced the clamping of msg_namelen when the unsigned value was larger than sizeof(struct sockaddr_storage). This caused a msg_namelen of -1 to be valid. The native code was subsequently fixed by commit dbb490b96584 (net: socket: error on a negative msg_namelen). In addition, the native code sets msg_namelen to 0 when msg_name is NULL. This was done in commit (6a2a2b3ae075 net:socket: set msg_namelen to 0 if msg_name is passed as NULL in msghdr struct from userland) and subsequently updated by 08adb7dabd48 (fold verify_iovec() into copy_msghdr_from_user()). This patch brings the get_compat_msghdr() in line with copy_msghdr_from_user(). Fixes: db31c55a6fb2 (net: clamp ->msg_namelen instead of returning an error) Cc: David S. Miller Cc: Dan Carpenter Signed-off-by: Catalin Marinas Signed-off-by: David S. Miller --- net/compat.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/compat.c b/net/compat.c index 94d3d5e97883..f7bd286a8280 100644 --- a/net/compat.c +++ b/net/compat.c @@ -49,6 +49,13 @@ ssize_t get_compat_msghdr(struct msghdr *kmsg, __get_user(kmsg->msg_controllen, &umsg->msg_controllen) || __get_user(kmsg->msg_flags, &umsg->msg_flags)) return -EFAULT; + + if (!uaddr) + kmsg->msg_namelen = 0; + + if (kmsg->msg_namelen < 0) + return -EINVAL; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) kmsg->msg_namelen = sizeof(struct sockaddr_storage); kmsg->msg_control = compat_ptr(tmp3); -- cgit v1.2.1 From 4de930efc23b92ddf88ce91c405ee645fe6e27ea Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 20 Mar 2015 17:41:43 +0000 Subject: net: validate the range we feed to iov_iter_init() in sys_sendto/sys_recvfrom Cc: stable@vger.kernel.org # v3.19 Signed-off-by: Al Viro Signed-off-by: David S. Miller --- net/socket.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/socket.c b/net/socket.c index bbedbfcb42c2..245330ca0015 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1702,6 +1702,8 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, if (len > INT_MAX) len = INT_MAX; + if (unlikely(!access_ok(VERIFY_READ, buff, len))) + return -EFAULT; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -1760,6 +1762,8 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, if (size > INT_MAX) size = INT_MAX; + if (unlikely(!access_ok(VERIFY_WRITE, ubuf, size))) + return -EFAULT; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; -- cgit v1.2.1 From f40bff4239d45ac061044a8a79cf6868c62df345 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Sat, 21 Mar 2015 11:29:37 +0100 Subject: cx82310_eth: wait for firmware to become ready When the device is powered up, some (older) firmware versions fail to work properly if we send commands before the boot is complete (everything is OK when the device is hot-plugged). The firmware indicates its ready status by putting the link up. Newer firmwares delay the first command so they don't suffer from this problem. They also report the link being always up. Wait for firmware to become ready (link up) before sending any commands and/or data. This also allows lowering CMD_TIMEOUT value to a reasonable time. Tested with 4.1.0.9 (old) and 4.1.0.30 (new) firmware versions. Signed-off-by: Ondrej Zary Signed-off-by: David S. Miller --- drivers/net/usb/cx82310_eth.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c index fe48f4c51373..1762ad3910b2 100644 --- a/drivers/net/usb/cx82310_eth.c +++ b/drivers/net/usb/cx82310_eth.c @@ -46,8 +46,7 @@ enum cx82310_status { }; #define CMD_PACKET_SIZE 64 -/* first command after power on can take around 8 seconds */ -#define CMD_TIMEOUT 15000 +#define CMD_TIMEOUT 100 #define CMD_REPLY_RETRY 5 #define CX82310_MTU 1514 @@ -78,8 +77,9 @@ static int cx82310_cmd(struct usbnet *dev, enum cx82310_cmd cmd, bool reply, ret = usb_bulk_msg(udev, usb_sndbulkpipe(udev, CMD_EP), buf, CMD_PACKET_SIZE, &actual_len, CMD_TIMEOUT); if (ret < 0) { - dev_err(&dev->udev->dev, "send command %#x: error %d\n", - cmd, ret); + if (cmd != CMD_GET_LINK_STATUS) + dev_err(&dev->udev->dev, "send command %#x: error %d\n", + cmd, ret); goto end; } @@ -90,8 +90,10 @@ static int cx82310_cmd(struct usbnet *dev, enum cx82310_cmd cmd, bool reply, buf, CMD_PACKET_SIZE, &actual_len, CMD_TIMEOUT); if (ret < 0) { - dev_err(&dev->udev->dev, - "reply receive error %d\n", ret); + if (cmd != CMD_GET_LINK_STATUS) + dev_err(&dev->udev->dev, + "reply receive error %d\n", + ret); goto end; } if (actual_len > 0) @@ -134,6 +136,8 @@ static int cx82310_bind(struct usbnet *dev, struct usb_interface *intf) int ret; char buf[15]; struct usb_device *udev = dev->udev; + u8 link[3]; + int timeout = 50; /* avoid ADSL modems - continue only if iProduct is "USB NET CARD" */ if (usb_string(udev, udev->descriptor.iProduct, buf, sizeof(buf)) > 0 @@ -160,6 +164,20 @@ static int cx82310_bind(struct usbnet *dev, struct usb_interface *intf) if (!dev->partial_data) return -ENOMEM; + /* wait for firmware to become ready (indicated by the link being up) */ + while (--timeout) { + ret = cx82310_cmd(dev, CMD_GET_LINK_STATUS, true, NULL, 0, + link, sizeof(link)); + /* the command can time out during boot - it's not an error */ + if (!ret && link[0] == 1 && link[2] == 1) + break; + msleep(500); + }; + if (!timeout) { + dev_err(&udev->dev, "firmware not ready in time\n"); + return -ETIMEDOUT; + } + /* enable ethernet mode (?) */ ret = cx82310_cmd(dev, CMD_ETHERNET_MODE, true, "\x01", 1, NULL, 0); if (ret) { -- cgit v1.2.1 From 749177ccc74f9c6d0f51bd78a15c652a2134aa11 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 21 Mar 2015 19:25:05 +0100 Subject: netfilter: nft_compat: set IP6T_F_PROTO flag if protocol is set ip6tables extensions check for this flag to restrict match/target to a given protocol. Without this flag set, SYNPROXY6 returns an error. Signed-off-by: Pablo Neira Ayuso Acked-by: Patrick McHardy --- net/netfilter/nft_compat.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 213584cf04b3..65f3e2b6be44 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -133,6 +133,9 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; break; case AF_INET6: + if (proto) + entry->e6.ipv6.flags |= IP6T_F_PROTO; + entry->e6.ipv6.proto = proto; entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; break; @@ -344,6 +347,9 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; break; case AF_INET6: + if (proto) + entry->e6.ipv6.flags |= IP6T_F_PROTO; + entry->e6.ipv6.proto = proto; entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; break; -- cgit v1.2.1 From 44d5f6f5901e996744858c175baee320ccf1eda3 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Tue, 17 Mar 2015 16:14:41 +0530 Subject: powerpc/book3s: Fix the MCE code to use CONFIG_KVM_BOOK3S_64_HANDLER commit id 2ba9f0d has changed CONFIG_KVM_BOOK3S_64_HV to tristate to allow HV/PR bits to be built as modules. But the MCE code still depends on CONFIG_KVM_BOOK3S_64_HV which is wrong. When user selects CONFIG_KVM_BOOK3S_64_HV=m to build HV/PR bits as a separate module the relevant MCE code gets excluded. This patch fixes the MCE code to use CONFIG_KVM_BOOK3S_64_HANDLER. This makes sure that the relevant MCE code is included when HV/PR bits are built as a separate modules. Fixes: 2ba9f0d88750 ("kvm: powerpc: book3s: Support building HV and PR KVM as module") Cc: stable@vger.kernel.org # v3.14+ Signed-off-by: Mahesh Salgaonkar Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index c2df8150bd7a..9519e6bdc6d7 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1408,7 +1408,7 @@ machine_check_handle_early: bne 9f /* continue in V mode if we are. */ 5: -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER /* * We are coming from kernel context. Check if we are coming from * guest. if yes, then we can continue. We will fall through -- cgit v1.2.1 From d525211f9d1be8b523ec7633f080f2116f5ea536 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Feb 2015 18:03:11 +0100 Subject: perf: Fix irq_work 'tail' recursion Vince reported a watchdog lockup like: [] perf_tp_event+0xc4/0x210 [] perf_trace_lock+0x12a/0x160 [] lock_release+0x130/0x260 [] _raw_spin_unlock_irqrestore+0x24/0x40 [] do_send_sig_info+0x5d/0x80 [] send_sigio_to_task+0x12f/0x1a0 [] send_sigio+0xae/0x100 [] kill_fasync+0x97/0xf0 [] perf_event_wakeup+0xd4/0xf0 [] perf_pending_event+0x33/0x60 [] irq_work_run_list+0x4c/0x80 [] irq_work_run+0x18/0x40 [] smp_trace_irq_work_interrupt+0x3f/0xc0 [] trace_irq_work_interrupt+0x6d/0x80 Which is caused by an irq_work generating new irq_work and therefore not allowing forward progress. This happens because processing the perf irq_work triggers another perf event (tracepoint stuff) which in turn generates an irq_work ad infinitum. Avoid this by raising the recursion counter in the irq_work -- which effectively disables all software events (including tracepoints) from actually triggering again. Reported-by: Vince Weaver Tested-by: Vince Weaver Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Paul Mackerras Cc: Steven Rostedt Cc: Link: http://lkml.kernel.org/r/20150219170311.GH21418@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 453ef61311d4..2fabc0627165 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4574,6 +4574,13 @@ static void perf_pending_event(struct irq_work *entry) { struct perf_event *event = container_of(entry, struct perf_event, pending); + int rctx; + + rctx = perf_swevent_get_recursion_context(); + /* + * If we 'fail' here, that's OK, it means recursion is already disabled + * and we won't recurse 'further'. + */ if (event->pending_disable) { event->pending_disable = 0; @@ -4584,6 +4591,9 @@ static void perf_pending_event(struct irq_work *entry) event->pending_wakeup = 0; perf_event_wakeup(event); } + + if (rctx >= 0) + perf_swevent_put_recursion_context(rctx); } /* -- cgit v1.2.1 From 746db9443ea57fd9c059f62c4bfbf41cf224fe13 Mon Sep 17 00:00:00 2001 From: Brian Silverman Date: Wed, 18 Feb 2015 16:23:56 -0800 Subject: sched: Fix RLIMIT_RTTIME when PI-boosting to RT When non-realtime tasks get priority-inheritance boosted to a realtime scheduling class, RLIMIT_RTTIME starts to apply to them. However, the counter used for checking this (the same one used for SCHED_RR timeslices) was not getting reset. This meant that tasks running with a non-realtime scheduling class which are repeatedly boosted to a realtime one, but never block while they are running realtime, eventually hit the timeout without ever running for a time over the limit. This patch resets the realtime timeslice counter when un-PI-boosting from an RT to a non-RT scheduling class. I have some test code with two threads and a shared PTHREAD_PRIO_INHERIT mutex which induces priority boosting and spins while boosted that gets killed by a SIGXCPU on non-fixed kernels but doesn't with this patch applied. It happens much faster with a CONFIG_PREEMPT_RT kernel, and does happen eventually with PREEMPT_VOLUNTARY kernels. Signed-off-by: Brian Silverman Signed-off-by: Peter Zijlstra (Intel) Cc: austin@peloton-tech.com Cc: Link: http://lkml.kernel.org/r/1424305436-6716-1-git-send-email-brian@peloton-tech.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f0f831e8a345..62671f53202a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3034,6 +3034,8 @@ void rt_mutex_setprio(struct task_struct *p, int prio) } else { if (dl_prio(oldprio)) p->dl.dl_boosted = 0; + if (rt_prio(oldprio)) + p->rt.timeout = 0; p->sched_class = &fair_sched_class; } -- cgit v1.2.1 From 35a9393c95b31870a74f51a3e7455f33f5657b6f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Feb 2015 16:23:11 +0100 Subject: lockdep: Fix the module unload key range freeing logic Module unload calls lockdep_free_key_range(), which removes entries from the data structures. Most of the lockdep code OTOH assumes the data structures are append only; in specific see the comments in add_lock_to_list() and look_up_lock_class(). Clearly this has only worked by accident; make it work proper. The actual scenario to make it go boom would involve the memory freed by the module unlock being re-allocated and re-used for a lock inside of a rcu-sched grace period. This is a very unlikely scenario, still better plug the hole. Use RCU list iteration in all places and ammend the comments. Change lockdep_free_key_range() to issue a sync_sched() between removal from the lists and returning -- which results in the memory being freed. Further ensure the callers are placed correctly and comment the requirements. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Andrey Tsyvarev Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rusty Russell Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 81 ++++++++++++++++++++++++++++++++---------------- kernel/module.c | 8 ++--- 2 files changed, 59 insertions(+), 30 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 88d0d4420ad2..ba77ab5f64dd 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -633,7 +633,7 @@ static int count_matching_names(struct lock_class *new_class) if (!new_class->name) return 0; - list_for_each_entry(class, &all_lock_classes, lock_entry) { + list_for_each_entry_rcu(class, &all_lock_classes, lock_entry) { if (new_class->key - new_class->subclass == class->key) return class->name_version; if (class->name && !strcmp(class->name, new_class->name)) @@ -700,10 +700,12 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) hash_head = classhashentry(key); /* - * We can walk the hash lockfree, because the hash only - * grows, and we are careful when adding entries to the end: + * We do an RCU walk of the hash, see lockdep_free_key_range(). */ - list_for_each_entry(class, hash_head, hash_entry) { + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return NULL; + + list_for_each_entry_rcu(class, hash_head, hash_entry) { if (class->key == key) { /* * Huh! same key, different name? Did someone trample @@ -728,7 +730,8 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) struct lockdep_subclass_key *key; struct list_head *hash_head; struct lock_class *class; - unsigned long flags; + + DEBUG_LOCKS_WARN_ON(!irqs_disabled()); class = look_up_lock_class(lock, subclass); if (likely(class)) @@ -750,28 +753,26 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) key = lock->key->subkeys + subclass; hash_head = classhashentry(key); - raw_local_irq_save(flags); if (!graph_lock()) { - raw_local_irq_restore(flags); return NULL; } /* * We have to do the hash-walk again, to avoid races * with another CPU: */ - list_for_each_entry(class, hash_head, hash_entry) + list_for_each_entry_rcu(class, hash_head, hash_entry) { if (class->key == key) goto out_unlock_set; + } + /* * Allocate a new key from the static array, and add it to * the hash: */ if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { if (!debug_locks_off_graph_unlock()) { - raw_local_irq_restore(flags); return NULL; } - raw_local_irq_restore(flags); print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!"); dump_stack(); @@ -798,7 +799,6 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) if (verbose(class)) { graph_unlock(); - raw_local_irq_restore(flags); printk("\nnew class %p: %s", class->key, class->name); if (class->name_version > 1) @@ -806,15 +806,12 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) printk("\n"); dump_stack(); - raw_local_irq_save(flags); if (!graph_lock()) { - raw_local_irq_restore(flags); return NULL; } } out_unlock_set: graph_unlock(); - raw_local_irq_restore(flags); out_set_class_cache: if (!subclass || force) @@ -870,11 +867,9 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this, entry->distance = distance; entry->trace = *trace; /* - * Since we never remove from the dependency list, the list can - * be walked lockless by other CPUs, it's only allocation - * that must be protected by the spinlock. But this also means - * we must make new entries visible only once writes to the - * entry become visible - hence the RCU op: + * Both allocation and removal are done under the graph lock; but + * iteration is under RCU-sched; see look_up_lock_class() and + * lockdep_free_key_range(). */ list_add_tail_rcu(&entry->entry, head); @@ -1025,7 +1020,9 @@ static int __bfs(struct lock_list *source_entry, else head = &lock->class->locks_before; - list_for_each_entry(entry, head, entry) { + DEBUG_LOCKS_WARN_ON(!irqs_disabled()); + + list_for_each_entry_rcu(entry, head, entry) { if (!lock_accessed(entry)) { unsigned int cq_depth; mark_lock_accessed(entry, lock); @@ -2022,7 +2019,7 @@ static inline int lookup_chain_cache(struct task_struct *curr, * We can walk it lock-free, because entries only get added * to the hash: */ - list_for_each_entry(chain, hash_head, entry) { + list_for_each_entry_rcu(chain, hash_head, entry) { if (chain->chain_key == chain_key) { cache_hit: debug_atomic_inc(chain_lookup_hits); @@ -2996,8 +2993,18 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, if (unlikely(!debug_locks)) return; - if (subclass) + if (subclass) { + unsigned long flags; + + if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion)) + return; + + raw_local_irq_save(flags); + current->lockdep_recursion = 1; register_lock_class(lock, subclass, 1); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); + } } EXPORT_SYMBOL_GPL(lockdep_init_map); @@ -3887,9 +3894,17 @@ static inline int within(const void *addr, void *start, unsigned long size) return addr >= start && addr < start + size; } +/* + * Used in module.c to remove lock classes from memory that is going to be + * freed; and possibly re-used by other modules. + * + * We will have had one sync_sched() before getting here, so we're guaranteed + * nobody will look up these exact classes -- they're properly dead but still + * allocated. + */ void lockdep_free_key_range(void *start, unsigned long size) { - struct lock_class *class, *next; + struct lock_class *class; struct list_head *head; unsigned long flags; int i; @@ -3905,7 +3920,7 @@ void lockdep_free_key_range(void *start, unsigned long size) head = classhash_table + i; if (list_empty(head)) continue; - list_for_each_entry_safe(class, next, head, hash_entry) { + list_for_each_entry_rcu(class, head, hash_entry) { if (within(class->key, start, size)) zap_class(class); else if (within(class->name, start, size)) @@ -3916,11 +3931,25 @@ void lockdep_free_key_range(void *start, unsigned long size) if (locked) graph_unlock(); raw_local_irq_restore(flags); + + /* + * Wait for any possible iterators from look_up_lock_class() to pass + * before continuing to free the memory they refer to. + * + * sync_sched() is sufficient because the read-side is IRQ disable. + */ + synchronize_sched(); + + /* + * XXX at this point we could return the resources to the pool; + * instead we leak them. We would need to change to bitmap allocators + * instead of the linear allocators we have now. + */ } void lockdep_reset_lock(struct lockdep_map *lock) { - struct lock_class *class, *next; + struct lock_class *class; struct list_head *head; unsigned long flags; int i, j; @@ -3948,7 +3977,7 @@ void lockdep_reset_lock(struct lockdep_map *lock) head = classhash_table + i; if (list_empty(head)) continue; - list_for_each_entry_safe(class, next, head, hash_entry) { + list_for_each_entry_rcu(class, head, hash_entry) { int match = 0; for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++) diff --git a/kernel/module.c b/kernel/module.c index b3d634ed06c9..99fdf94efce8 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1865,7 +1865,7 @@ static void free_module(struct module *mod) kfree(mod->args); percpu_modfree(mod); - /* Free lock-classes: */ + /* Free lock-classes; relies on the preceding sync_rcu(). */ lockdep_free_key_range(mod->module_core, mod->core_size); /* Finally, free the core (containing the module structure) */ @@ -3349,9 +3349,6 @@ static int load_module(struct load_info *info, const char __user *uargs, module_bug_cleanup(mod); mutex_unlock(&module_mutex); - /* Free lock-classes: */ - lockdep_free_key_range(mod->module_core, mod->core_size); - /* we can't deallocate the module until we clear memory protection */ unset_module_init_ro_nx(mod); unset_module_core_ro_nx(mod); @@ -3375,6 +3372,9 @@ static int load_module(struct load_info *info, const char __user *uargs, synchronize_rcu(); mutex_unlock(&module_mutex); free_module: + /* Free lock-classes; relies on the preceding sync_rcu() */ + lockdep_free_key_range(mod->module_core, mod->core_size); + module_deallocate(mod, info); free_copy: free_copy(info); -- cgit v1.2.1 From a127d2bcf1fbc8c8e0b5cf0dab54f7d3ff50ce47 Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Wed, 18 Mar 2015 16:19:27 +0530 Subject: timers/tick/broadcast-hrtimer: Fix suspicious RCU usage in idle loop The hrtimer mode of broadcast queues hrtimers in the idle entry path so as to wakeup cpus in deep idle states. The associated call graph is : cpuidle_idle_call() |____ clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, ....)) |_____tick_broadcast_set_event() |____clockevents_program_event() |____bc_set_next() The hrtimer_{start/cancel} functions call into tracing which uses RCU. But it is not legal to call into RCU in cpuidle because it is one of the quiescent states. Hence protect this region with RCU_NONIDLE which informs RCU that the cpu is momentarily non-idle. As an aside it is helpful to point out that the clock event device that is programmed here is not a per-cpu clock device; it is a pseudo clock device, used by the broadcast framework alone. The per-cpu clock device programming never goes through bc_set_next(). Signed-off-by: Preeti U Murthy Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Paul E. McKenney Cc: linuxppc-dev@ozlabs.org Cc: mpe@ellerman.id.au Cc: tglx@linutronix.de Link: http://lkml.kernel.org/r/20150318104705.17763.56668.stgit@preeti.in.ibm.com Signed-off-by: Ingo Molnar --- kernel/time/tick-broadcast-hrtimer.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c index eb682d5c697c..6aac4beedbbe 100644 --- a/kernel/time/tick-broadcast-hrtimer.c +++ b/kernel/time/tick-broadcast-hrtimer.c @@ -49,6 +49,7 @@ static void bc_set_mode(enum clock_event_mode mode, */ static int bc_set_next(ktime_t expires, struct clock_event_device *bc) { + int bc_moved; /* * We try to cancel the timer first. If the callback is on * flight on some other cpu then we let it handle it. If we @@ -60,9 +61,15 @@ static int bc_set_next(ktime_t expires, struct clock_event_device *bc) * restart the timer because we are in the callback, but we * can set the expiry time and let the callback return * HRTIMER_RESTART. + * + * Since we are in the idle loop at this point and because + * hrtimer_{start/cancel} functions call into tracing, + * calls to these functions must be bound within RCU_NONIDLE. */ - if (hrtimer_try_to_cancel(&bctimer) >= 0) { - hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED); + RCU_NONIDLE(bc_moved = (hrtimer_try_to_cancel(&bctimer) >= 0) ? + !hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED) : + 0); + if (bc_moved) { /* Bind the "device" to the cpu */ bc->bound_on = smp_processor_id(); } else if (bc->bound_on == smp_processor_id()) { -- cgit v1.2.1 From 47514da3ac20150cdf764466fbc2010c0fca0163 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 15 Mar 2015 21:32:12 +0100 Subject: parisc: Add compile-time check when adding new syscalls Signed-off-by: Helge Deller --- arch/parisc/kernel/syscall_table.S | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 5a8997d63899..8eefb12d1d33 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -55,8 +55,8 @@ #define ENTRY_COMP(_name_) .word sys_##_name_ #endif - ENTRY_SAME(restart_syscall) /* 0 */ - ENTRY_SAME(exit) +90: ENTRY_SAME(restart_syscall) /* 0 */ +91: ENTRY_SAME(exit) ENTRY_SAME(fork_wrapper) ENTRY_SAME(read) ENTRY_SAME(write) @@ -439,7 +439,10 @@ ENTRY_SAME(bpf) ENTRY_COMP(execveat) - /* Nothing yet */ + +.ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b)) +.error "size of syscall table does not fit value of __NR_Linux_syscalls" +.endif #undef ENTRY_SAME #undef ENTRY_DIFF -- cgit v1.2.1 From 0e0da48dee8dfbcc0df4b8e2ff4efc7a2c89ba6b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 18 Mar 2015 13:42:38 -0400 Subject: parisc: mm: don't count preallocated pmds The patch dc6c9a35b66b520cf67e05d8ca60ebecad3b0479 that counts pmds allocated for a process introduced a bug on 64-bit PA-RISC kernels. The PA-RISC architecture preallocates one pmd with each pgd. This preallocated pmd can never be freed - pmd_free does nothing when it is called with this pmd. When the kernel attempts to free this preallocated pmd, it decreases the count of allocated pmds. The result is that the counter underflows and this error is reported. This patch fixes the bug by artifically incrementing the counter in pmd_free when the kernel tries to free the preallocated pmd. Signed-off-by: Mikulas Patocka Acked-by: Kirill A. Shutemov Signed-off-by: Helge Deller --- arch/parisc/include/asm/pgalloc.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index f213f5b4c423..63e9ecae1310 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -74,8 +74,13 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { #ifdef CONFIG_64BIT if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED) - /* This is the permanent pmd attached to the pgd; - * cannot free it */ + /* + * This is the permanent pmd attached to the pgd; + * cannot free it. + * Increment the counter to compensate for the decrement + * done by generic mm code. + */ + mm_inc_nr_pmds(mm); return; #endif free_pages((unsigned long)pmd, PMD_ORDER); -- cgit v1.2.1 From 2e3f0ab2bb4853694570b9610b1fcfbfa8fd295b Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 16 Mar 2015 21:17:50 +0100 Subject: parisc: Fix pmd code to depend on PT_NLEVELS value, not on CONFIG_64BIT Make the code which sets up the pmd depend on PT_NLEVELS == 3, not on CONFIG_64BIT. The reason is, that a 64bit kernel with a page size greater than 4k doesn't need the pmd and thus has PT_NLEVELS = 2. Signed-off-by: Helge Deller --- arch/parisc/include/asm/pgalloc.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index 63e9ecae1310..d17437238a2c 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -26,7 +26,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) if (likely(pgd != NULL)) { memset(pgd, 0, PAGE_SIZE< Date: Mon, 23 Mar 2015 11:17:56 +0000 Subject: metag: Fix ioremap_wc/ioremap_cached build errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When ioremap_wc() or ioremap_cached() are used without first including asm/pgtable.h, the _PAGE_CACHEABLE or _PAGE_WR_COMBINE definitions aren't found, resulting in build errors like the following (in next-20150323 due to "lib: devres: add a helper function for ioremap_wc"): lib/devres.c: In function ‘devm_ioremap_wc’: lib/devres.c:91: error: ‘_PAGE_WR_COMBINE’ undeclared We can't easily include asm/pgtable.h in asm/io.h due to dependency problems, so split out the _PAGE_* definitions from asm/pgtable.h into a separate asm/pgtable-bits.h header (as a couple of other architectures already do), and include that in io.h instead. Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Cc: Abhilash Kesavan Cc: Greg Kroah-Hartman --- arch/metag/include/asm/io.h | 1 + arch/metag/include/asm/pgtable-bits.h | 104 ++++++++++++++++++++++++++++++++++ arch/metag/include/asm/pgtable.h | 95 +------------------------------ 3 files changed, 106 insertions(+), 94 deletions(-) create mode 100644 arch/metag/include/asm/pgtable-bits.h diff --git a/arch/metag/include/asm/io.h b/arch/metag/include/asm/io.h index 9359e5048442..d5779b0ec573 100644 --- a/arch/metag/include/asm/io.h +++ b/arch/metag/include/asm/io.h @@ -2,6 +2,7 @@ #define _ASM_METAG_IO_H #include +#include #define IO_SPACE_LIMIT 0 diff --git a/arch/metag/include/asm/pgtable-bits.h b/arch/metag/include/asm/pgtable-bits.h new file mode 100644 index 000000000000..25ba6729f496 --- /dev/null +++ b/arch/metag/include/asm/pgtable-bits.h @@ -0,0 +1,104 @@ +/* + * Meta page table definitions. + */ + +#ifndef _METAG_PGTABLE_BITS_H +#define _METAG_PGTABLE_BITS_H + +#include + +/* + * Definitions for MMU descriptors + * + * These are the hardware bits in the MMCU pte entries. + * Derived from the Meta toolkit headers. + */ +#define _PAGE_PRESENT MMCU_ENTRY_VAL_BIT +#define _PAGE_WRITE MMCU_ENTRY_WR_BIT +#define _PAGE_PRIV MMCU_ENTRY_PRIV_BIT +/* Write combine bit - this can cause writes to occur out of order */ +#define _PAGE_WR_COMBINE MMCU_ENTRY_WRC_BIT +/* Sys coherent bit - this bit is never used by Linux */ +#define _PAGE_SYS_COHERENT MMCU_ENTRY_SYS_BIT +#define _PAGE_ALWAYS_ZERO_1 0x020 +#define _PAGE_CACHE_CTRL0 0x040 +#define _PAGE_CACHE_CTRL1 0x080 +#define _PAGE_ALWAYS_ZERO_2 0x100 +#define _PAGE_ALWAYS_ZERO_3 0x200 +#define _PAGE_ALWAYS_ZERO_4 0x400 +#define _PAGE_ALWAYS_ZERO_5 0x800 + +/* These are software bits that we stuff into the gaps in the hardware + * pte entries that are not used. Note, these DO get stored in the actual + * hardware, but the hardware just does not use them. + */ +#define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_1 +#define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_2 + +/* Pages owned, and protected by, the kernel. */ +#define _PAGE_KERNEL _PAGE_PRIV + +/* No cacheing of this page */ +#define _PAGE_CACHE_WIN0 (MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S) +/* burst cacheing - good for data streaming */ +#define _PAGE_CACHE_WIN1 (MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S) +/* One cache way per thread */ +#define _PAGE_CACHE_WIN2 (MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S) +/* Full on cacheing */ +#define _PAGE_CACHE_WIN3 (MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S) + +#define _PAGE_CACHEABLE (_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE) + +/* which bits are used for cache control ... */ +#define _PAGE_CACHE_MASK (_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \ + _PAGE_WR_COMBINE) + +/* This is a mask of the bits that pte_modify is allowed to change. */ +#define _PAGE_CHG_MASK (PAGE_MASK) + +#define _PAGE_SZ_SHIFT 1 +#define _PAGE_SZ_4K (0x0) +#define _PAGE_SZ_8K (0x1 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_16K (0x2 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_32K (0x3 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_64K (0x4 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_128K (0x5 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_256K (0x6 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_512K (0x7 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_1M (0x8 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_2M (0x9 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_4M (0xa << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_MASK (0xf << _PAGE_SZ_SHIFT) + +#if defined(CONFIG_PAGE_SIZE_4K) +#define _PAGE_SZ (_PAGE_SZ_4K) +#elif defined(CONFIG_PAGE_SIZE_8K) +#define _PAGE_SZ (_PAGE_SZ_8K) +#elif defined(CONFIG_PAGE_SIZE_16K) +#define _PAGE_SZ (_PAGE_SZ_16K) +#endif +#define _PAGE_TABLE (_PAGE_SZ | _PAGE_PRESENT) + +#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K) +# define _PAGE_SZHUGE (_PAGE_SZ_8K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K) +# define _PAGE_SZHUGE (_PAGE_SZ_16K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K) +# define _PAGE_SZHUGE (_PAGE_SZ_32K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K) +# define _PAGE_SZHUGE (_PAGE_SZ_64K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K) +# define _PAGE_SZHUGE (_PAGE_SZ_128K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K) +# define _PAGE_SZHUGE (_PAGE_SZ_256K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) +# define _PAGE_SZHUGE (_PAGE_SZ_512K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M) +# define _PAGE_SZHUGE (_PAGE_SZ_1M) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M) +# define _PAGE_SZHUGE (_PAGE_SZ_2M) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M) +# define _PAGE_SZHUGE (_PAGE_SZ_4M) +#endif + +#endif /* _METAG_PGTABLE_BITS_H */ diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h index d0604c0a8702..ffa3a3a2ecad 100644 --- a/arch/metag/include/asm/pgtable.h +++ b/arch/metag/include/asm/pgtable.h @@ -5,6 +5,7 @@ #ifndef _METAG_PGTABLE_H #define _METAG_PGTABLE_H +#include #include /* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */ @@ -20,100 +21,6 @@ #define VMALLOC_END 0x7FFFFFFF #endif -/* - * Definitions for MMU descriptors - * - * These are the hardware bits in the MMCU pte entries. - * Derived from the Meta toolkit headers. - */ -#define _PAGE_PRESENT MMCU_ENTRY_VAL_BIT -#define _PAGE_WRITE MMCU_ENTRY_WR_BIT -#define _PAGE_PRIV MMCU_ENTRY_PRIV_BIT -/* Write combine bit - this can cause writes to occur out of order */ -#define _PAGE_WR_COMBINE MMCU_ENTRY_WRC_BIT -/* Sys coherent bit - this bit is never used by Linux */ -#define _PAGE_SYS_COHERENT MMCU_ENTRY_SYS_BIT -#define _PAGE_ALWAYS_ZERO_1 0x020 -#define _PAGE_CACHE_CTRL0 0x040 -#define _PAGE_CACHE_CTRL1 0x080 -#define _PAGE_ALWAYS_ZERO_2 0x100 -#define _PAGE_ALWAYS_ZERO_3 0x200 -#define _PAGE_ALWAYS_ZERO_4 0x400 -#define _PAGE_ALWAYS_ZERO_5 0x800 - -/* These are software bits that we stuff into the gaps in the hardware - * pte entries that are not used. Note, these DO get stored in the actual - * hardware, but the hardware just does not use them. - */ -#define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_1 -#define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_2 - -/* Pages owned, and protected by, the kernel. */ -#define _PAGE_KERNEL _PAGE_PRIV - -/* No cacheing of this page */ -#define _PAGE_CACHE_WIN0 (MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S) -/* burst cacheing - good for data streaming */ -#define _PAGE_CACHE_WIN1 (MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S) -/* One cache way per thread */ -#define _PAGE_CACHE_WIN2 (MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S) -/* Full on cacheing */ -#define _PAGE_CACHE_WIN3 (MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S) - -#define _PAGE_CACHEABLE (_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE) - -/* which bits are used for cache control ... */ -#define _PAGE_CACHE_MASK (_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \ - _PAGE_WR_COMBINE) - -/* This is a mask of the bits that pte_modify is allowed to change. */ -#define _PAGE_CHG_MASK (PAGE_MASK) - -#define _PAGE_SZ_SHIFT 1 -#define _PAGE_SZ_4K (0x0) -#define _PAGE_SZ_8K (0x1 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_16K (0x2 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_32K (0x3 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_64K (0x4 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_128K (0x5 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_256K (0x6 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_512K (0x7 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_1M (0x8 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_2M (0x9 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_4M (0xa << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_MASK (0xf << _PAGE_SZ_SHIFT) - -#if defined(CONFIG_PAGE_SIZE_4K) -#define _PAGE_SZ (_PAGE_SZ_4K) -#elif defined(CONFIG_PAGE_SIZE_8K) -#define _PAGE_SZ (_PAGE_SZ_8K) -#elif defined(CONFIG_PAGE_SIZE_16K) -#define _PAGE_SZ (_PAGE_SZ_16K) -#endif -#define _PAGE_TABLE (_PAGE_SZ | _PAGE_PRESENT) - -#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K) -# define _PAGE_SZHUGE (_PAGE_SZ_8K) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K) -# define _PAGE_SZHUGE (_PAGE_SZ_16K) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K) -# define _PAGE_SZHUGE (_PAGE_SZ_32K) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K) -# define _PAGE_SZHUGE (_PAGE_SZ_64K) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K) -# define _PAGE_SZHUGE (_PAGE_SZ_128K) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K) -# define _PAGE_SZHUGE (_PAGE_SZ_256K) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) -# define _PAGE_SZHUGE (_PAGE_SZ_512K) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M) -# define _PAGE_SZHUGE (_PAGE_SZ_1M) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M) -# define _PAGE_SZHUGE (_PAGE_SZ_2M) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M) -# define _PAGE_SZHUGE (_PAGE_SZ_4M) -#endif - /* * The Linux memory management assumes a three-level page table setup. On * Meta, we use that, but "fold" the mid level into the top-level page -- cgit v1.2.1 From e6e96d73a2aaaa54ed2c0f98693f4bf572712f1c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 23 Mar 2015 09:32:37 -0600 Subject: NVMe: Initialize device list head before starting Driver recovery requires the device's list node to have been initialized. Fixes: https://lkml.org/lkml/2015/3/22/262 Reported-by: Steven Noonan Signed-off-by: Keith Busch Cc: Matthew Wilcox Cc: Jens Axboe Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index ceb32dd52a6c..e23be20a3417 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -3003,6 +3003,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) } get_device(dev->device); + INIT_LIST_HEAD(&dev->node); INIT_WORK(&dev->probe_work, nvme_async_probe); schedule_work(&dev->probe_work); return 0; -- cgit v1.2.1 From c72efb658f7c8b27ca3d0efb5cfd5ded9fcac89e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 23 Mar 2015 00:18:48 -0400 Subject: writeback: fix possible underflow in write bandwidth calculation From 1ebf33901ecc75d9496862dceb1ef0377980587c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 23 Mar 2015 00:08:19 -0400 2f800fbd777b ("writeback: fix dirtied pages accounting on redirty") introduced account_page_redirty() which reverts stat updates for a redirtied page, making BDI_DIRTIED no longer monotonically increasing. bdi_update_write_bandwidth() uses the delta in BDI_DIRTIED as the basis for bandwidth calculation. While unlikely, since the above patch, the newer value may be lower than the recorded past value and underflow the bandwidth calculation leading to a wild result. Fix it by subtracing min of the old and new values when calculating delta. AFAIK, there hasn't been any report of it happening but the resulting erratic behavior would be non-critical and temporary, so it's possible that the issue is happening without being reported. The risk of the fix is very low, so tagged for -stable. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Wu Fengguang Cc: Greg Thelen Fixes: 2f800fbd777b ("writeback: fix dirtied pages accounting on redirty") Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- mm/page-writeback.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index b4fd980a93eb..644bcb665773 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -857,8 +857,11 @@ static void bdi_update_write_bandwidth(struct backing_dev_info *bdi, * bw * elapsed + write_bandwidth * (period - elapsed) * write_bandwidth = --------------------------------------------------- * period + * + * @written may have decreased due to account_page_redirty(). + * Avoid underflowing @bw calculation. */ - bw = written - bdi->written_stamp; + bw = written - min(written, bdi->written_stamp); bw *= HZ; if (unlikely(elapsed > period)) { do_div(bw, elapsed); -- cgit v1.2.1 From 2077cef4d5c29cf886192ec32066f783d6a80db8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 23 Mar 2015 09:22:10 -0700 Subject: sparc64: Fix several bugs in memmove(). Firstly, handle zero length calls properly. Believe it or not there are a few of these happening during early boot. Next, we can't just drop to a memcpy() call in the forward copy case where dst <= src. The reason is that the cache initializing stores used in the Niagara memcpy() implementations can end up clearing out cache lines before we've sourced their original contents completely. For example, considering NG4memcpy, the main unrolled loop begins like this: load src + 0x00 load src + 0x08 load src + 0x10 load src + 0x18 load src + 0x20 store dst + 0x00 Assume dst is 64 byte aligned and let's say that dst is src - 8 for this memcpy() call. That store at the end there is the one to the first line in the cache line, thus clearing the whole line, which thus clobbers "src + 0x28" before it even gets loaded. To avoid this, just fall through to a simple copy only mildly optimized for the case where src and dst are 8 byte aligned and the length is a multiple of 8 as well. We could get fancy and call GENmemcpy() but this is good enough for how this thing is actually used. Reported-by: David Ahern Reported-by: Bob Picco Signed-off-by: David S. Miller --- arch/sparc/lib/memmove.S | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/arch/sparc/lib/memmove.S b/arch/sparc/lib/memmove.S index b7f6334e159f..857ad4f8905f 100644 --- a/arch/sparc/lib/memmove.S +++ b/arch/sparc/lib/memmove.S @@ -8,9 +8,11 @@ .text ENTRY(memmove) /* o0=dst o1=src o2=len */ - mov %o0, %g1 + brz,pn %o2, 99f + mov %o0, %g1 + cmp %o0, %o1 - bleu,pt %xcc, memcpy + bleu,pt %xcc, 2f add %o1, %o2, %g7 cmp %g7, %o0 bleu,pt %xcc, memcpy @@ -24,7 +26,34 @@ ENTRY(memmove) /* o0=dst o1=src o2=len */ stb %g7, [%o0] bne,pt %icc, 1b sub %o0, 1, %o0 - +99: retl mov %g1, %o0 + + /* We can't just call memcpy for these memmove cases. On some + * chips the memcpy uses cache initializing stores and when dst + * and src are close enough, those can clobber the source data + * before we've loaded it in. + */ +2: or %o0, %o1, %g7 + or %o2, %g7, %g7 + andcc %g7, 0x7, %g0 + bne,pn %xcc, 4f + nop + +3: ldx [%o1], %g7 + add %o1, 8, %o1 + subcc %o2, 8, %o2 + add %o0, 8, %o0 + bne,pt %icc, 3b + stx %g7, [%o0 - 0x8] + ba,a,pt %xcc, 99b + +4: ldub [%o1], %g7 + add %o1, 1, %o1 + subcc %o2, 1, %o2 + add %o0, 1, %o0 + bne,pt %icc, 4b + stb %g7, [%o0 - 0x1] + ba,a,pt %xcc, 99b ENDPROC(memmove) -- cgit v1.2.1 From 80b311d3118842eb681397233faa0d588df13f92 Mon Sep 17 00:00:00 2001 From: Sebastian Wicki Date: Mon, 23 Mar 2015 17:23:11 +0100 Subject: ALSA: hda - Add dock support for Thinkpad T450s (17aa:5036) This model uses the same dock port as the previous generation. Signed-off-by: Sebastian Wicki Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 526398a4a442..7b0f72c5c6f1 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5036,6 +5036,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x501a, "Thinkpad", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x501e, "Thinkpad L440", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x5026, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x17aa, 0x5036, "Thinkpad T450s", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K), SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD), -- cgit v1.2.1 From e53f21bce4d35a93b23d8fa1a840860f6c74f59e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 23 Mar 2015 15:06:50 +0000 Subject: arm64: Use the reserved TTBR0 if context switching to the init_mm The idle_task_exit() function may call switch_mm() with next == &init_mm. On arm64, init_mm.pgd cannot be used for user mappings, so this patch simply sets the reserved TTBR0. Cc: Reported-by: Jon Medhurst (Tixy) Tested-by: Jon Medhurst (Tixy) Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mmu_context.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index a9eee33dfa62..101a42bde728 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -151,6 +151,15 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, { unsigned int cpu = smp_processor_id(); + /* + * init_mm.pgd does not contain any user mappings and it is always + * active for kernel addresses in TTBR1. Just set the reserved TTBR0. + */ + if (next == &init_mm) { + cpu_set_reserved_ttbr0(); + return; + } + if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) check_and_switch_context(next, tsk); } -- cgit v1.2.1 From 63a4f065ece613b6d575b538234375b0e9c23bbc Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 23 Mar 2015 17:01:43 -0400 Subject: dm: fix add_disk() NULL pointer due to race with free_dev() Commit c4db59d31e39 ("fs: don't reassign dirty inodes to default_backing_dev_info") exposed DM to a latent race in free_dev() vs add_disk() in relation to management of the device's minor number. Fix this by refactoring free_dev() to match cleanup order of the alloc_dev() error path. Move cleanup of the gendisk, queue, and bdev to _before_ the cleanup of the idr managed minor number. Also, purely due to cleanup that fell out during the free_dev() audit: - adjust dm_blk_close() to access the gendisk's private_data under the _minor_lock spinlock. - move __dm_destroy()'s dm_get_live_table() call out from under the _minor_lock spinlock. Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1202449 Reported-by: Zdenek Kabelac Reported-by: Jeff Moyer Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 9b641b38b857..8001fe9e3434 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -433,7 +433,6 @@ static int dm_blk_open(struct block_device *bdev, fmode_t mode) dm_get(md); atomic_inc(&md->open_count); - out: spin_unlock(&_minor_lock); @@ -442,16 +441,20 @@ out: static void dm_blk_close(struct gendisk *disk, fmode_t mode) { - struct mapped_device *md = disk->private_data; + struct mapped_device *md; spin_lock(&_minor_lock); + md = disk->private_data; + if (WARN_ON(!md)) + goto out; + if (atomic_dec_and_test(&md->open_count) && (test_bit(DMF_DEFERRED_REMOVE, &md->flags))) queue_work(deferred_remove_workqueue, &deferred_remove_work); dm_put(md); - +out: spin_unlock(&_minor_lock); } @@ -2241,7 +2244,6 @@ static void free_dev(struct mapped_device *md) int minor = MINOR(disk_devt(md->disk)); unlock_fs(md); - bdput(md->bdev); destroy_workqueue(md->wq); if (md->kworker_task) @@ -2252,19 +2254,22 @@ static void free_dev(struct mapped_device *md) mempool_destroy(md->rq_pool); if (md->bs) bioset_free(md->bs); - blk_integrity_unregister(md->disk); - del_gendisk(md->disk); + cleanup_srcu_struct(&md->io_barrier); free_table_devices(&md->table_devices); - free_minor(minor); + dm_stats_cleanup(&md->stats); spin_lock(&_minor_lock); md->disk->private_data = NULL; spin_unlock(&_minor_lock); - + if (blk_get_integrity(md->disk)) + blk_integrity_unregister(md->disk); + del_gendisk(md->disk); put_disk(md->disk); blk_cleanup_queue(md->queue); - dm_stats_cleanup(&md->stats); + bdput(md->bdev); + free_minor(minor); + module_put(THIS_MODULE); kfree(md); } @@ -2642,8 +2647,9 @@ static void __dm_destroy(struct mapped_device *md, bool wait) might_sleep(); - spin_lock(&_minor_lock); map = dm_get_live_table(md, &srcu_idx); + + spin_lock(&_minor_lock); idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); set_bit(DMF_FREEING, &md->flags); spin_unlock(&_minor_lock); -- cgit v1.2.1 From c806a6ad35bfa6c92249cd0ca4772d5ac3f8cb68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 18 Mar 2015 19:38:22 +0100 Subject: KVM: x86: call irq notifiers with directed EOI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kvm_ioapic_update_eoi() wasn't called if directed EOI was enabled. We need to do that for irq notifiers. (Like with edge interrupts.) Fix it by skipping EOI broadcast only. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=82211 Signed-off-by: Radim Krčmář Reviewed-by: Paolo Bonzini Tested-by: Bandan Das Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/ioapic.c | 4 +++- arch/x86/kvm/lapic.c | 3 +-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index b1947e0f3e10..46d4449772bc 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -422,6 +422,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, struct kvm_ioapic *ioapic, int vector, int trigger_mode) { int i; + struct kvm_lapic *apic = vcpu->arch.apic; for (i = 0; i < IOAPIC_NUM_PINS; i++) { union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; @@ -443,7 +444,8 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i); spin_lock(&ioapic->lock); - if (trigger_mode != IOAPIC_LEVEL_TRIG) + if (trigger_mode != IOAPIC_LEVEL_TRIG || + kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) continue; ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index bd4e34de24c7..4ee827d7bf36 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -833,8 +833,7 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) { - if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && - kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { + if (kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { int trigger_mode; if (apic_test_vector(vector, apic->regs + APIC_TMR)) trigger_mode = IOAPIC_LEVEL_TRIG; -- cgit v1.2.1 From 744961341d472db6272ed9b42319a90f5a2aa7c4 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Fri, 20 Mar 2015 12:21:37 +0000 Subject: kvm: avoid page allocation failure in kvm_set_memory_region() KVM guest can fail to startup with following trace on host: qemu-system-x86: page allocation failure: order:4, mode:0x40d0 Call Trace: dump_stack+0x47/0x67 warn_alloc_failed+0xee/0x150 __alloc_pages_direct_compact+0x14a/0x150 __alloc_pages_nodemask+0x776/0xb80 alloc_kmem_pages+0x3a/0x110 kmalloc_order+0x13/0x50 kmemdup+0x1b/0x40 __kvm_set_memory_region+0x24a/0x9f0 [kvm] kvm_set_ioapic+0x130/0x130 [kvm] kvm_set_memory_region+0x21/0x40 [kvm] kvm_vm_ioctl+0x43f/0x750 [kvm] Failure happens when attempting to allocate pages for 'struct kvm_memslots', however it doesn't have to be present in physically contiguous (kmalloc-ed) address space, change allocation to kvm_kvzalloc() so that it will be vmalloc-ed when its size is more then a page. Signed-off-by: Igor Mammedov Signed-off-by: Marcelo Tosatti --- virt/kvm/kvm_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a2214d9609bd..cc6a25d95fbf 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -471,7 +471,7 @@ static struct kvm *kvm_create_vm(unsigned long type) BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); r = -ENOMEM; - kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); + kvm->memslots = kvm_kvzalloc(sizeof(struct kvm_memslots)); if (!kvm->memslots) goto out_err_no_srcu; @@ -522,7 +522,7 @@ out_err_no_srcu: out_err_no_disable: for (i = 0; i < KVM_NR_BUSES; i++) kfree(kvm->buses[i]); - kfree(kvm->memslots); + kvfree(kvm->memslots); kvm_arch_free_vm(kvm); return ERR_PTR(r); } @@ -578,7 +578,7 @@ static void kvm_free_physmem(struct kvm *kvm) kvm_for_each_memslot(memslot, slots) kvm_free_physmem_slot(kvm, memslot, NULL); - kfree(kvm->memslots); + kvfree(kvm->memslots); } static void kvm_destroy_devices(struct kvm *kvm) @@ -871,10 +871,10 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; } - slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), - GFP_KERNEL); + slots = kvm_kvzalloc(sizeof(struct kvm_memslots)); if (!slots) goto out_free; + memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { slot = id_to_memslot(slots, mem->slot); @@ -917,7 +917,7 @@ int __kvm_set_memory_region(struct kvm *kvm, kvm_arch_commit_memory_region(kvm, mem, &old, change); kvm_free_physmem_slot(kvm, &old, &new); - kfree(old_memslots); + kvfree(old_memslots); /* * IOMMU mapping: New slots need to be mapped. Old slots need to be @@ -936,7 +936,7 @@ int __kvm_set_memory_region(struct kvm *kvm, return 0; out_slots: - kfree(slots); + kvfree(slots); out_free: kvm_free_physmem_slot(kvm, &new, &old); out: -- cgit v1.2.1 From 8218c3f4df3bb1c637c17552405039a6dd3c1ee1 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 27 Feb 2015 12:58:13 +0100 Subject: drm: Fixup racy refcounting in plane_force_disable Originally it was impossible to be dropping the last refcount in this function since there was always one around still from the idr. But in commit 83f45fc360c8e16a330474860ebda872d1384c8c Author: Daniel Vetter Date: Wed Aug 6 09:10:18 2014 +0200 drm: Don't grab an fb reference for the idr we've switched to weak references, broke that assumption but forgot to fix it up. Since we still force-disable planes it's only possible to hit this when racing multiple rmfb with fbdev restoring or similar evil things. As long as userspace is nice it's impossible to hit the BUG_ON. But the BUG_ON would most likely be hit from fbdev code, which usually invovles the console_lock besides all modeset locks. So very likely we'd never get the bug reports if this was hit in the wild, hence better be safe than sorry and backport. Spotted by Matt Roper while reviewing other patches. [airlied: pull this back into 4.0 - the oops happens there] Cc: stable@vger.kernel.org Cc: Matt Roper Reviewed-by: Matt Roper Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index f6d04c7b5115..679b10e34fb5 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -525,17 +525,6 @@ void drm_framebuffer_reference(struct drm_framebuffer *fb) } EXPORT_SYMBOL(drm_framebuffer_reference); -static void drm_framebuffer_free_bug(struct kref *kref) -{ - BUG(); -} - -static void __drm_framebuffer_unreference(struct drm_framebuffer *fb) -{ - DRM_DEBUG("%p: FB ID: %d (%d)\n", fb, fb->base.id, atomic_read(&fb->refcount.refcount)); - kref_put(&fb->refcount, drm_framebuffer_free_bug); -} - /** * drm_framebuffer_unregister_private - unregister a private fb from the lookup idr * @fb: fb to unregister @@ -1320,7 +1309,7 @@ void drm_plane_force_disable(struct drm_plane *plane) return; } /* disconnect the plane from the fb and crtc: */ - __drm_framebuffer_unreference(plane->old_fb); + drm_framebuffer_unreference(plane->old_fb); plane->old_fb = NULL; plane->fb = NULL; plane->crtc = NULL; -- cgit v1.2.1 From f3eab7184ddcd4867cf42e3274ba24a66e1e093d Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Sun, 22 Mar 2015 14:51:51 +0000 Subject: arm64: percpu: Make this_cpu accessors pre-empt safe this_cpu operations were implemented for arm64 in: 5284e1b arm64: xchg: Implement cmpxchg_double f97fc81 arm64: percpu: Implement this_cpu operations Unfortunately, it is possible for pre-emption to take place between address generation and data access. This can lead to cases where data is being manipulated by this_cpu for a different CPU than it was called on. Which effectively breaks the spec. This patch disables pre-emption for the this_cpu operations guaranteeing that address generation and data manipulation take place without a pre-emption in-between. Fixes: 5284e1b4bc8a ("arm64: xchg: Implement cmpxchg_double") Fixes: f97fc810798c ("arm64: percpu: Implement this_cpu operations") Reported-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Steve Capper [catalin.marinas@arm.com: remove space after type cast] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cmpxchg.h | 32 +++++++++++++++++++++-------- arch/arm64/include/asm/percpu.h | 44 ++++++++++++++++++++++++++++++---------- 2 files changed, 57 insertions(+), 19 deletions(-) diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index cb9593079f29..d8c25b7b18fb 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -246,14 +246,30 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, __ret; \ }) -#define this_cpu_cmpxchg_1(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) -#define this_cpu_cmpxchg_2(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) -#define this_cpu_cmpxchg_4(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) -#define this_cpu_cmpxchg_8(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) - -#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \ - cmpxchg_double_local(raw_cpu_ptr(&(ptr1)), raw_cpu_ptr(&(ptr2)), \ - o1, o2, n1, n2) +#define _protect_cmpxchg_local(pcp, o, n) \ +({ \ + typeof(*raw_cpu_ptr(&(pcp))) __ret; \ + preempt_disable(); \ + __ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n); \ + preempt_enable(); \ + __ret; \ +}) + +#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) + +#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \ +({ \ + int __ret; \ + preempt_disable(); \ + __ret = cmpxchg_double_local( raw_cpu_ptr(&(ptr1)), \ + raw_cpu_ptr(&(ptr2)), \ + o1, o2, n1, n2); \ + preempt_enable(); \ + __ret; \ +}) #define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n)) #define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n)) diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 09da25bc596f..4fde8c1df97f 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -204,25 +204,47 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, return ret; } +#define _percpu_read(pcp) \ +({ \ + typeof(pcp) __retval; \ + preempt_disable(); \ + __retval = (typeof(pcp))__percpu_read(raw_cpu_ptr(&(pcp)), \ + sizeof(pcp)); \ + preempt_enable(); \ + __retval; \ +}) + +#define _percpu_write(pcp, val) \ +do { \ + preempt_disable(); \ + __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), \ + sizeof(pcp)); \ + preempt_enable(); \ +} while(0) \ + +#define _pcp_protect(operation, pcp, val) \ +({ \ + typeof(pcp) __retval; \ + preempt_disable(); \ + __retval = (typeof(pcp))operation(raw_cpu_ptr(&(pcp)), \ + (val), sizeof(pcp)); \ + preempt_enable(); \ + __retval; \ +}) + #define _percpu_add(pcp, val) \ - __percpu_add(raw_cpu_ptr(&(pcp)), val, sizeof(pcp)) + _pcp_protect(__percpu_add, pcp, val) -#define _percpu_add_return(pcp, val) (typeof(pcp)) (_percpu_add(pcp, val)) +#define _percpu_add_return(pcp, val) _percpu_add(pcp, val) #define _percpu_and(pcp, val) \ - __percpu_and(raw_cpu_ptr(&(pcp)), val, sizeof(pcp)) + _pcp_protect(__percpu_and, pcp, val) #define _percpu_or(pcp, val) \ - __percpu_or(raw_cpu_ptr(&(pcp)), val, sizeof(pcp)) - -#define _percpu_read(pcp) (typeof(pcp)) \ - (__percpu_read(raw_cpu_ptr(&(pcp)), sizeof(pcp))) - -#define _percpu_write(pcp, val) \ - __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp)) + _pcp_protect(__percpu_or, pcp, val) #define _percpu_xchg(pcp, val) (typeof(pcp)) \ - (__percpu_xchg(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp))) + _pcp_protect(__percpu_xchg, pcp, (unsigned long)(val)) #define this_cpu_add_1(pcp, val) _percpu_add(pcp, val) #define this_cpu_add_2(pcp, val) _percpu_add(pcp, val) -- cgit v1.2.1 From b3494a4ab20f6bdf74cdf2badf7918bb65ee8a00 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 23 Mar 2015 12:32:54 -0700 Subject: x86/asm/entry: Check for syscall exit work with IRQs disabled We currently have a race: if we're preempted during syscall exit, we can fail to process syscall return work that is queued up while we're preempted in ret_from_sys_call after checking ti.flags. Fix it by disabling interrupts before checking ti.flags. Reported-by: Stefan Seyfried Reported-by: Takashi Iwai Signed-off-by: Andy Lutomirski Acked-by: Denys Vlasenko Cc: Jiri Kosina Cc: Tejun Heo Fixes: 96b6352c1271 ("x86_64, entry: Remove the syscall exit audit") Link: http://lkml.kernel.org/r/189320d42b4d671df78c10555976bb10af1ffc75.1427137498.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1d74d161687c..2babb393915e 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -364,12 +364,21 @@ system_call_fastpath: * Has incomplete stack frame and undefined top of stack. */ ret_from_sys_call: - testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) - jnz int_ret_from_sys_call_fixup /* Go the the slow path */ - LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF + + /* + * We must check ti flags with interrupts (or at least preemption) + * off because we must *never* return to userspace without + * processing exit work that is enqueued if we're preempted here. + * In particular, returning to userspace with any of the one-shot + * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is + * very bad. + */ + testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + jnz int_ret_from_sys_call_fixup /* Go the the slow path */ + CFI_REMEMBER_STATE /* * sysretq will re-enable interrupts: @@ -386,7 +395,7 @@ ret_from_sys_call: int_ret_from_sys_call_fixup: FIXUP_TOP_OF_STACK %r11, -ARGOFFSET - jmp int_ret_from_sys_call + jmp int_ret_from_sys_call_irqs_off /* Do syscall tracing */ tracesys: @@ -432,6 +441,7 @@ tracesys_phase2: GLOBAL(int_ret_from_sys_call) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF +int_ret_from_sys_call_irqs_off: movl $_TIF_ALLWORK_MASK,%edi /* edi: mask to check */ GLOBAL(int_with_check) -- cgit v1.2.1 From 977104e5606a6df8fe22c0dacd3620fc00b58d61 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 25 Mar 2015 00:53:26 +0800 Subject: ARM: dts: sun4i: a10-lime: Override and remove 1008MHz OPP setting The Olimex A10-Lime is known to be unstable when running at 1008MHz. Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts b/arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts index ab7891c43231..75742f8f96f3 100644 --- a/arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts +++ b/arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts @@ -56,6 +56,22 @@ model = "Olimex A10-OLinuXino-LIME"; compatible = "olimex,a10-olinuxino-lime", "allwinner,sun4i-a10"; + cpus { + cpu0: cpu@0 { + /* + * The A10-Lime is known to be unstable + * when running at 1008 MHz + */ + operating-points = < + /* kHz uV */ + 912000 1350000 + 864000 1300000 + 624000 1250000 + >; + cooling-max-level = <2>; + }; + }; + soc@01c00000 { emac: ethernet@01c0b000 { pinctrl-names = "default"; -- cgit v1.2.1 From 370a9b5fb04a0d5cc7b7699c788616d6976f4476 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 25 Mar 2015 00:53:27 +0800 Subject: ARM: dts: sunxi: Remove overclocked/overvoltaged OPP Without proper regulator support for individual boards, it is dangerous to have overclocked/overvoltaged OPPs in the list. Cpufreq will increase the frequency without the accompanying voltage increase, resulting in an unstable system. Remove them for now. We can revisit them with the new version of OPP bindings, which support boost settings and frequency ranges, among other things. Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun4i-a10.dtsi | 3 +-- arch/arm/boot/dts/sun5i-a13.dtsi | 3 +-- arch/arm/boot/dts/sun7i-a20.dtsi | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index 5c2925831f20..eebb7853e00b 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -75,7 +75,6 @@ clock-latency = <244144>; /* 8 32k periods */ operating-points = < /* kHz uV */ - 1056000 1500000 1008000 1400000 912000 1350000 864000 1300000 @@ -83,7 +82,7 @@ >; #cooling-cells = <2>; cooling-min-level = <0>; - cooling-max-level = <4>; + cooling-max-level = <3>; }; }; diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi index f8818f1edbbe..883cb4873688 100644 --- a/arch/arm/boot/dts/sun5i-a13.dtsi +++ b/arch/arm/boot/dts/sun5i-a13.dtsi @@ -47,7 +47,6 @@ clock-latency = <244144>; /* 8 32k periods */ operating-points = < /* kHz uV */ - 1104000 1500000 1008000 1400000 912000 1350000 864000 1300000 @@ -57,7 +56,7 @@ >; #cooling-cells = <2>; cooling-min-level = <0>; - cooling-max-level = <6>; + cooling-max-level = <5>; }; }; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 3a8530b79f1c..fdd181792b4b 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -105,7 +105,6 @@ clock-latency = <244144>; /* 8 32k periods */ operating-points = < /* kHz uV */ - 1008000 1450000 960000 1400000 912000 1400000 864000 1300000 @@ -116,7 +115,7 @@ >; #cooling-cells = <2>; cooling-min-level = <0>; - cooling-max-level = <7>; + cooling-max-level = <6>; }; cpu@1 { -- cgit v1.2.1 From 59a58cb34d3fe73e6c899cc5d9a87428ca662925 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Thu, 5 Feb 2015 18:30:20 +0000 Subject: drm/i915: Don't try to reference the fb in get_initial_plane_config() Tvrtko noticed a new warning on boot: WARNING: CPU: 1 PID: 353 at include/linux/kref.h:47 drm_framebuffer_reference+0x6c/0x80 [drm]() Call Trace: [] dump_stack+0x4f/0x7b [] warn_slowpath_common+0xaa/0xd0 [] warn_slowpath_null+0x1a/0x20 [] drm_framebuffer_reference+0x6c/0x80 [drm] [] update_state_fb.isra.54+0x47/0x50 [i915] [] skylake_get_initial_plane_config+0x93c/0x950 [i915] [] intel_modeset_init+0x1551/0x17c0 [i915] [] i915_driver_load+0xed0/0x11e0 [i915] [] ? _raw_spin_unlock_irqrestore+0x51/0x70 [] drm_dev_register+0x77/0x110 [drm] [] drm_get_pci_dev+0x11b/0x1f0 [drm] [] ? trace_hardirqs_on+0xd/0x10 [] ? _raw_spin_unlock_irqrestore+0x51/0x70 [] i915_pci_probe+0x56/0x60 [i915] [] pci_device_probe+0x7c/0x100 [] driver_probe_device+0x16d/0x380 We cannot take a reference at this point, not before intel_framebuffer_init() and the underlying drm_framebuffer_init(). Introduced in: commit 706dc7b549175e47f23e913b7f1e52874a7d0f56 Author: Matt Roper Date: Tue Feb 3 13:10:04 2015 -0800 drm/i915: Ensure plane->state->fb stays in sync with plane->fb v2: Don't move update_state_fb(). It was moved around because I originally put update_state_fb() in intel_alloc_plane_obj() before finding a better place. (Matt) Reviewed-by: Matt Roper Reported-by: Tvrtko Ursulin Cc: Matt Roper Cc: Tvrtko Ursulin Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter From drm-next: (cherry picked from commit f55548b5af87ebfc586ca75748947f1c1b1a4a52) Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/intel_display.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 6d22128d97b1..1c12262029fb 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2438,8 +2438,10 @@ intel_find_plane_obj(struct intel_crtc *intel_crtc, if (!intel_crtc->base.primary->fb) return; - if (intel_alloc_plane_obj(intel_crtc, plane_config)) + if (intel_alloc_plane_obj(intel_crtc, plane_config)) { + update_state_fb(intel_crtc->base.primary); return; + } kfree(intel_crtc->base.primary->fb); intel_crtc->base.primary->fb = NULL; @@ -6663,7 +6665,6 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, plane_config->size); crtc->base.primary->fb = fb; - update_state_fb(crtc->base.primary); } static void chv_crtc_clock_get(struct intel_crtc *crtc, @@ -7704,7 +7705,6 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, plane_config->size); crtc->base.primary->fb = fb; - update_state_fb(crtc->base.primary); return; error: @@ -7798,7 +7798,6 @@ ironlake_get_initial_plane_config(struct intel_crtc *crtc, plane_config->size); crtc->base.primary->fb = fb; - update_state_fb(crtc->base.primary); } static bool ironlake_get_pipe_config(struct intel_crtc *crtc, -- cgit v1.2.1 From 1833c9f647e9bda1cd24653ff8f9c207b5f5b911 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 21 Mar 2015 12:43:08 +0100 Subject: s390/smp: reenable smt after resume After a suspend/resume cycle we missed to enable smt again, which leads to all sorts of bugs, since the kernel assumes smt is enabled, while the hardware thinks it is not. Reported-and-tested-by: Sebastian Ott Reported-by: Stefan Haberland Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/swsusp_asm64.S | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S index 6b09fdffbd2f..ca6294645dd3 100644 --- a/arch/s390/kernel/swsusp_asm64.S +++ b/arch/s390/kernel/swsusp_asm64.S @@ -177,6 +177,17 @@ restart_entry: lhi %r1,1 sigp %r1,%r0,SIGP_SET_ARCHITECTURE sam64 +#ifdef CONFIG_SMP + larl %r1,smp_cpu_mt_shift + icm %r1,15,0(%r1) + jz smt_done + llgfr %r1,%r1 +smt_loop: + sigp %r1,%r0,SIGP_SET_MULTI_THREADING + brc 8,smt_done /* accepted */ + brc 2,smt_loop /* busy, try again */ +smt_done: +#endif larl %r1,.Lnew_pgm_check_psw lpswe 0(%r1) pgm_check_entry: -- cgit v1.2.1 From 6436a123a147db51a0b06024a8350f4c230e73ff Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 23 Mar 2015 18:01:35 -0700 Subject: selinux: fix sel_write_enforce broken return value Return a negative error value like the rest of the entries in this function. Cc: Signed-off-by: Joe Perches Acked-by: Stephen Smalley [PM: tweaked subject line] Signed-off-by: Paul Moore --- security/selinux/selinuxfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 33db1ad4fd10..138949a31eab 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -152,7 +152,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf, goto out; /* No partial writes. */ - length = EINVAL; + length = -EINVAL; if (*ppos != 0) goto out; -- cgit v1.2.1 From fb903811c4e3bcd93ccc247235834e2632a3508d Mon Sep 17 00:00:00 2001 From: Ed Cashin Date: Wed, 25 Mar 2015 15:55:06 -0700 Subject: aoe: update aoe maintainer information The coraid.com email address is defunct. The old aoe support area hosted at coraid.com is no longer up. These changes update the email and website to current ones. Signed-off-by: Ed Cashin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 358eb0105e00..c6cd0f60635c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1675,8 +1675,8 @@ F: drivers/misc/eeprom/at24.c F: include/linux/platform_data/at24.h ATA OVER ETHERNET (AOE) DRIVER -M: "Ed L. Cashin" -W: http://support.coraid.com/support/linux +M: "Ed L. Cashin" +W: http://www.openaoe.org/ S: Supported F: Documentation/aoe/ F: drivers/block/aoe/ -- cgit v1.2.1 From ddd2a30d41a5bc578ab094f6dbf080697ea1a7dd Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 25 Mar 2015 15:55:09 -0700 Subject: drivers/rtc/rtc-mrst: fix suspend/resume The Moorestown RTC driver implements suspend and resume callbacks and assigns them to the suspend and resume fields of the device_driver struct. These callbacks are never actually called by anything though. Modify the driver to properly use dev_pm_ops so that the suspend and resume functions are actually executed upon suspend/resume. [akpm@linux-foundation.org: device_driver.name is const char *] Signed-off-by: Lars-Peter Clausen Cc: Alessandro Zummo Cc: Feng Tang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-mrst.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c index e2436d140175..3a6fd3a8a2ec 100644 --- a/drivers/rtc/rtc-mrst.c +++ b/drivers/rtc/rtc-mrst.c @@ -413,8 +413,8 @@ static void rtc_mrst_do_remove(struct device *dev) mrst->dev = NULL; } -#ifdef CONFIG_PM -static int mrst_suspend(struct device *dev, pm_message_t mesg) +#ifdef CONFIG_PM_SLEEP +static int mrst_suspend(struct device *dev) { struct mrst_rtc *mrst = dev_get_drvdata(dev); unsigned char tmp; @@ -453,7 +453,7 @@ static int mrst_suspend(struct device *dev, pm_message_t mesg) */ static inline int mrst_poweroff(struct device *dev) { - return mrst_suspend(dev, PMSG_HIBERNATE); + return mrst_suspend(dev); } static int mrst_resume(struct device *dev) @@ -490,9 +490,11 @@ static int mrst_resume(struct device *dev) return 0; } +static SIMPLE_DEV_PM_OPS(mrst_pm_ops, mrst_suspend, mrst_resume); +#define MRST_PM_OPS (&mrst_pm_ops) + #else -#define mrst_suspend NULL -#define mrst_resume NULL +#define MRST_PM_OPS NULL static inline int mrst_poweroff(struct device *dev) { @@ -529,9 +531,8 @@ static struct platform_driver vrtc_mrst_platform_driver = { .remove = vrtc_mrst_platform_remove, .shutdown = vrtc_mrst_platform_shutdown, .driver = { - .name = (char *) driver_name, - .suspend = mrst_suspend, - .resume = mrst_resume, + .name = driver_name, + .pm = MRST_PM_OPS, } }; -- cgit v1.2.1 From 3fe89b3e2a7bbf3e97657104b9b33a9d81b950b3 Mon Sep 17 00:00:00 2001 From: Leon Yu Date: Wed, 25 Mar 2015 15:55:11 -0700 Subject: mm: fix anon_vma->degree underflow in anon_vma endless growing prevention I have constantly stumbled upon "kernel BUG at mm/rmap.c:399!" after upgrading to 3.19 and had no luck with 4.0-rc1 neither. So, after looking into new logic introduced by commit 7a3ef208e662 ("mm: prevent endless growth of anon_vma hierarchy"), I found chances are that unlink_anon_vmas() is called without incrementing dst->anon_vma->degree in anon_vma_clone() due to allocation failure. If dst->anon_vma is not NULL in error path, its degree will be incorrectly decremented in unlink_anon_vmas() and eventually underflow when exiting as a result of another call to unlink_anon_vmas(). That's how "kernel BUG at mm/rmap.c:399!" is triggered for me. This patch fixes the underflow by dropping dst->anon_vma when allocation fails. It's safe to do so regardless of original value of dst->anon_vma because dst->anon_vma doesn't have valid meaning if anon_vma_clone() fails. Besides, callers don't care dst->anon_vma in such case neither. Also suggested by Michal Hocko, we can clean up vma_adjust() a bit as anon_vma_clone() now does the work. [akpm@linux-foundation.org: tweak comment] Fixes: 7a3ef208e662 ("mm: prevent endless growth of anon_vma hierarchy") Signed-off-by: Leon Yu Signed-off-by: Konstantin Khlebnikov Reviewed-by: Michal Hocko Acked-by: Rik van Riel Acked-by: David Rientjes Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmap.c | 4 +--- mm/rmap.c | 7 +++++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index da9990acc08b..9ec50a368634 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -774,10 +774,8 @@ again: remove_next = 1 + (end > next->vm_end); importer->anon_vma = exporter->anon_vma; error = anon_vma_clone(importer, exporter); - if (error) { - importer->anon_vma = NULL; + if (error) return error; - } } } diff --git a/mm/rmap.c b/mm/rmap.c index 5e3e09081164..c161a14b6a8f 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -287,6 +287,13 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) return 0; enomem_failure: + /* + * dst->anon_vma is dropped here otherwise its degree can be incorrectly + * decremented in unlink_anon_vmas(). + * We can safely do this because callers of anon_vma_clone() don't care + * about dst->anon_vma if anon_vma_clone() failed. + */ + dst->anon_vma = NULL; unlink_anon_vmas(dst); return -ENOMEM; } -- cgit v1.2.1 From f683739539e819e9b821a197d80e52258510837b Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 25 Mar 2015 15:55:14 -0700 Subject: mm/pagewalk.c: prevent positive return value of walk_page_test() from being passed to callers walk_page_test() is purely pagewalk's internal stuff, and its positive return values are not intended to be passed to the callers of pagewalk. However, in the current code if the last vma in the do-while loop in walk_page_range() happens to return a positive value, it leaks outside walk_page_range(). So the user visible effect is invalid/unexpected return value (according to the reporter, mbind() causes it.) This patch fixes it simply by reinitializing the return value after checked. Another exposed interface, walk_page_vma(), already returns 0 for such cases so no problem. Fixes: fafaa4264eba ("pagewalk: improve vma handling") Signed-off-by: Naoya Horiguchi Signed-off-by: Kazutomo Yoshii Reported-by: Kazutomo Yoshii Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/pagewalk.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 75c1f2878519..29f2f8b853ae 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -265,8 +265,15 @@ int walk_page_range(unsigned long start, unsigned long end, vma = vma->vm_next; err = walk_page_test(start, next, walk); - if (err > 0) + if (err > 0) { + /* + * positive return values are purely for + * controlling the pagewalk, so should never + * be passed to the callers. + */ + err = 0; continue; + } if (err < 0) break; } -- cgit v1.2.1 From 59ec96719f7521002804c0862888971410e288af Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 25 Mar 2015 15:55:17 -0700 Subject: MAINTAINERS: correct rtc armada38x pattern entry Commit c6a95dbee793 ("MAINTAINERS: add the RTC driver for the Armada38x") typoed the pattern, fix it. Signed-off-by: Joe Perches Acked-by: Gregory CLEMENT Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index c6cd0f60635c..12ebef95224d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1186,7 +1186,7 @@ M: Sebastian Hesselbarth L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-mvebu/ -F: drivers/rtc/armada38x-rtc +F: drivers/rtc/rtc-armada38x.c ARM/Marvell Berlin SoC support M: Sebastian Hesselbarth -- cgit v1.2.1 From b0dc3a342af36f95a68fe229b8f0f73552c5ca08 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Wed, 25 Mar 2015 15:55:20 -0700 Subject: mm/memory hotplug: postpone the reset of obsolete pgdat Qiu Xishi reported the following BUG when testing hot-add/hot-remove node under stress condition: BUG: unable to handle kernel paging request at 0000000000025f60 IP: next_online_pgdat+0x1/0x50 PGD 0 Oops: 0000 [#1] SMP ACPI: Device does not support D3cold Modules linked in: fuse nls_iso8859_1 nls_cp437 vfat fat loop dm_mod coretemp mperf crc32c_intel ghash_clmulni_intel aesni_intel ablk_helper cryptd lrw gf128mul glue_helper aes_x86_64 pcspkr microcode igb dca i2c_algo_bit ipv6 megaraid_sas iTCO_wdt i2c_i801 i2c_core iTCO_vendor_support tg3 sg hwmon ptp lpc_ich pps_core mfd_core acpi_pad rtc_cmos button ext3 jbd mbcache sd_mod crc_t10dif scsi_dh_alua scsi_dh_rdac scsi_dh_hp_sw scsi_dh_emc scsi_dh ahci libahci libata scsi_mod [last unloaded: rasf] CPU: 23 PID: 238 Comm: kworker/23:1 Tainted: G O 3.10.15-5885-euler0302 #1 Hardware name: HUAWEI TECHNOLOGIES CO.,LTD. Huawei N1/Huawei N1, BIOS V100R001 03/02/2015 Workqueue: events vmstat_update task: ffffa800d32c0000 ti: ffffa800d32ae000 task.ti: ffffa800d32ae000 RIP: 0010: next_online_pgdat+0x1/0x50 RSP: 0018:ffffa800d32afce8 EFLAGS: 00010286 RAX: 0000000000001440 RBX: ffffffff81da53b8 RCX: 0000000000000082 RDX: 0000000000000000 RSI: 0000000000000082 RDI: 0000000000000000 RBP: ffffa800d32afd28 R08: ffffffff81c93bfc R09: ffffffff81cbdc96 R10: 00000000000040ec R11: 00000000000000a0 R12: ffffa800fffb3440 R13: ffffa800d32afd38 R14: 0000000000000017 R15: ffffa800e6616800 FS: 0000000000000000(0000) GS:ffffa800e6600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000025f60 CR3: 0000000001a0b000 CR4: 00000000001407e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: refresh_cpu_vm_stats+0xd0/0x140 vmstat_update+0x11/0x50 process_one_work+0x194/0x3d0 worker_thread+0x12b/0x410 kthread+0xc6/0xd0 ret_from_fork+0x7c/0xb0 The cause is the "memset(pgdat, 0, sizeof(*pgdat))" at the end of try_offline_node, which will reset all the content of pgdat to 0, as the pgdat is accessed lock-free, so that the users still using the pgdat will panic, such as the vmstat_update routine. process A: offline node XX: vmstat_updat() refresh_cpu_vm_stats() for_each_populated_zone() find online node XX cond_resched() offline cpu and memory, then try_offline_node() node_set_offline(nid), and memset(pgdat, 0, sizeof(*pgdat)) zone = next_zone(zone) pg_data_t *pgdat = zone->zone_pgdat; // here pgdat is NULL now next_online_pgdat(pgdat) next_online_node(pgdat->node_id); // NULL pointer access So the solution here is postponing the reset of obsolete pgdat from try_offline_node() to hotadd_new_pgdat(), and just resetting pgdat->nr_zones and pgdat->classzone_idx to be 0 rather than the memset 0 to avoid breaking pointer information in pgdat. Signed-off-by: Gu Zheng Reported-by: Xishi Qiu Suggested-by: KAMEZAWA Hiroyuki Cc: David Rientjes Cc: Yasuaki Ishimatsu Cc: Taku Izumi Cc: Tang Chen Cc: Xie XiuQi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9fab10795bea..65842d688b7c 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1092,6 +1092,10 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) return NULL; arch_refresh_nodedata(nid, pgdat); + } else { + /* Reset the nr_zones and classzone_idx to 0 before reuse */ + pgdat->nr_zones = 0; + pgdat->classzone_idx = 0; } /* we can use NODE_DATA(nid) from here */ @@ -1977,15 +1981,6 @@ void try_offline_node(int nid) if (is_vmalloc_addr(zone->wait_table)) vfree(zone->wait_table); } - - /* - * Since there is no way to guarentee the address of pgdat/zone is not - * on stack of any kernel threads or used by other kernel objects - * without reference counting or other symchronizing method, do not - * reset node_data and free pgdat here. Just reset it to 0 and reuse - * the memory when the node is online again. - */ - memset(pgdat, 0, sizeof(*pgdat)); } EXPORT_SYMBOL(try_offline_node); -- cgit v1.2.1 From 859b7a0e89120505c304d7afbbe90325abaa0a6b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 25 Mar 2015 15:55:23 -0700 Subject: mm/slub: fix lockups on PREEMPT && !SMP kernels Commit 9aabf810a67c ("mm/slub: optimize alloc/free fastpath by removing preemption on/off") introduced an occasional hang for kernels built with CONFIG_PREEMPT && !CONFIG_SMP. The problem is the following loop the patch introduced to slab_alloc_node and slab_free: do { tid = this_cpu_read(s->cpu_slab->tid); c = raw_cpu_ptr(s->cpu_slab); } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid)); GCC 4.9 has been observed to hoist the load of c and c->tid above the loop for !SMP kernels (as in this case raw_cpu_ptr(x) is compile-time constant and does not force a reload). On arm64 the generated assembly looks like: ldr x4, [x0,#8] loop: ldr x1, [x0,#8] cmp x1, x4 b.ne loop If the thread is preempted between the load of c->tid (into x1) and tid (into x4), and an allocation or free occurs in another thread (bumping the cpu_slab's tid), the thread will be stuck in the loop until s->cpu_slab->tid wraps, which may be forever in the absence of allocations/frees on the same CPU. This patch changes the loop condition to access c->tid with READ_ONCE. This ensures that the value is reloaded even when the compiler would otherwise assume it could cache the value, and also ensures that the load will not be torn. Signed-off-by: Mark Rutland Cc: Catalin Marinas Acked-by: Christoph Lameter Cc: David Rientjes Cc: Jesper Dangaard Brouer Cc: Joonsoo Kim Cc: Pekka Enberg Cc: Steve Capper Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 6832c4eab104..82c473780c91 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2449,7 +2449,8 @@ redo: do { tid = this_cpu_read(s->cpu_slab->tid); c = raw_cpu_ptr(s->cpu_slab); - } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid)); + } while (IS_ENABLED(CONFIG_PREEMPT) && + unlikely(tid != READ_ONCE(c->tid))); /* * Irqless object alloc/free algorithm used here depends on sequence @@ -2718,7 +2719,8 @@ redo: do { tid = this_cpu_read(s->cpu_slab->tid); c = raw_cpu_ptr(s->cpu_slab); - } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid)); + } while (IS_ENABLED(CONFIG_PREEMPT) && + unlikely(tid != READ_ONCE(c->tid))); /* Same with comment on barrier() in slab_alloc_node() */ barrier(); -- cgit v1.2.1 From cfa869438282be84ad4110bba5027ef1fbbe71e4 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 25 Mar 2015 15:55:26 -0700 Subject: mm/page_alloc.c: call kernel_map_pages in unset_migrateype_isolate Commit 3c605096d315 ("mm/page_alloc: restrict max order of merging on isolated pageblock") changed the logic of unset_migratetype_isolate to check the buddy allocator and explicitly call __free_pages to merge. The page that is being freed in this path never had prep_new_page called so set_page_refcounted is called explicitly but there is no call to kernel_map_pages. With the default kernel_map_pages this is mostly harmless but if kernel_map_pages does any manipulation of the page tables (unmapping or setting pages to read only) this may trigger a fault: alloc_contig_range test_pages_isolated(ceb00, ced00) failed Unable to handle kernel paging request at virtual address ffffffc0cec00000 pgd = ffffffc045fc4000 [ffffffc0cec00000] *pgd=0000000000000000 Internal error: Oops: 9600004f [#1] PREEMPT SMP Modules linked in: exfatfs CPU: 1 PID: 23237 Comm: TimedEventQueue Not tainted 3.10.49-gc72ad36-dirty #1 task: ffffffc03de52100 ti: ffffffc015388000 task.ti: ffffffc015388000 PC is at memset+0xc8/0x1c0 LR is at kernel_map_pages+0x1ec/0x244 Fix this by calling kernel_map_pages to ensure the page is set in the page table properly Fixes: 3c605096d315 ("mm/page_alloc: restrict max order of merging on isolated pageblock") Signed-off-by: Laura Abbott Cc: Naoya Horiguchi Cc: Mel Gorman Acked-by: Rik van Riel Cc: Yasuaki Ishimatsu Cc: Zhang Yanfei Cc: Xishi Qiu Cc: Vladimir Davydov Acked-by: Joonsoo Kim Cc: Gioh Kim Cc: Michal Nazarewicz Cc: Marek Szyprowski Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_isolation.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 72f5ac381ab3..755a42c76eb4 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -103,6 +103,7 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype) if (!is_migrate_isolate_page(buddy)) { __isolate_free_page(page, order); + kernel_map_pages(page, (1 << order), 1); set_page_refcounted(page); isolated_page = page; } -- cgit v1.2.1 From 3d5d472cf55d1be091e8a145f80602bf435ece85 Mon Sep 17 00:00:00 2001 From: Taesoo Kim Date: Wed, 25 Mar 2015 15:55:29 -0700 Subject: fs/affs/file.c: unlock/release page on error When affs_bread_ino() fails, correctly unlock the page and release the page cache with proper error value. All write_end() should unlock/release the page that was locked by write_beg(). Signed-off-by: Taesoo Kim Cc: Fabian Frederick Cc: Al Viro Cc: Geert Uytterhoeven Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/affs/file.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/affs/file.c b/fs/affs/file.c index d2468bf95669..a91795e01a7f 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -699,8 +699,10 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, boff = tmp % bsize; if (boff) { bh = affs_bread_ino(inode, bidx, 0); - if (IS_ERR(bh)) - return PTR_ERR(bh); + if (IS_ERR(bh)) { + written = PTR_ERR(bh); + goto err_first_bh; + } tmp = min(bsize - boff, to - from); BUG_ON(boff + tmp > bsize || tmp > bsize); memcpy(AFFS_DATA(bh) + boff, data + from, tmp); @@ -712,14 +714,16 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, bidx++; } else if (bidx) { bh = affs_bread_ino(inode, bidx - 1, 0); - if (IS_ERR(bh)) - return PTR_ERR(bh); + if (IS_ERR(bh)) { + written = PTR_ERR(bh); + goto err_first_bh; + } } while (from + bsize <= to) { prev_bh = bh; bh = affs_getemptyblk_ino(inode, bidx); if (IS_ERR(bh)) - goto out; + goto err_bh; memcpy(AFFS_DATA(bh), data + from, bsize); if (buffer_new(bh)) { AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); @@ -751,7 +755,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, prev_bh = bh; bh = affs_bread_ino(inode, bidx, 1); if (IS_ERR(bh)) - goto out; + goto err_bh; tmp = min(bsize, to - from); BUG_ON(tmp > bsize); memcpy(AFFS_DATA(bh), data + from, tmp); @@ -790,12 +794,13 @@ done: if (tmp > inode->i_size) inode->i_size = AFFS_I(inode)->mmu_private = tmp; +err_first_bh: unlock_page(page); page_cache_release(page); return written; -out: +err_bh: bh = prev_bh; if (!written) written = PTR_ERR(bh); -- cgit v1.2.1 From 1f31e1b1963c240ced453489730bdfc9b0110ceb Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 25 Mar 2015 15:55:31 -0700 Subject: MAINTAINERS: add Jan as DMI/SMBIOS support maintainer I am familiar with these drivers and I care about them so let me add myself as their maintainer. Signed-off-by: Jean Delvare Acked-by: Matt Fleming Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 12ebef95224d..88c09ca2584f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3252,6 +3252,13 @@ S: Maintained F: Documentation/hwmon/dme1737 F: drivers/hwmon/dme1737.c +DMI/SMBIOS SUPPORT +M: Jean Delvare +S: Maintained +F: drivers/firmware/dmi-id.c +F: drivers/firmware/dmi_scan.c +F: include/linux/dmi.h + DOCKING STATION DRIVER M: Shaohua Li L: linux-acpi@vger.kernel.org -- cgit v1.2.1 From 98cf21c61a7f5419d82f847c4d77bf6e96a76f5f Mon Sep 17 00:00:00 2001 From: Sergei Antonov Date: Wed, 25 Mar 2015 15:55:34 -0700 Subject: hfsplus: fix B-tree corruption after insertion at position 0 Fix B-tree corruption when a new record is inserted at position 0 in the node in hfs_brec_insert(). In this case a hfs_brec_update_parent() is called to update the parent index node (if exists) and it is passed hfs_find_data with a search_key containing a newly inserted key instead of the key to be updated. This results in an inconsistent index node. The bug reproduces on my machine after an extents overflow record for the catalog file (CNID=4) is inserted into the extents overflow B-tree. Because of a low (reserved) value of CNID=4, it has to become the first record in the first leaf node. The resulting first leaf node is correct: ---------------------------------------------------- | key0.CNID=4 | key1.CNID=123 | key2.CNID=456, ... | ---------------------------------------------------- But the parent index key0 still contains the previous key CNID=123: ----------------------- | key0.CNID=123 | ... | ----------------------- A change in hfs_brec_insert() makes hfs_brec_update_parent() work correctly by preventing it from getting fd->record=-1 value from __hfs_brec_find(). Along the way, I removed duplicate code with unification of the if condition. The resulting code is equivalent to the original code because node is never 0. Also hfs_brec_update_parent() will now return an error after getting a negative fd->record value. However, the return value of hfs_brec_update_parent() is not checked anywhere in the file and I'm leaving it unchanged by this patch. brec.c lacks error checking after some other calls too, but this issue is of less importance than the one being fixed by this patch. Signed-off-by: Sergei Antonov Cc: Joe Perches Reviewed-by: Vyacheslav Dubeyko Acked-by: Hin-Tak Leung Cc: Anton Altaparmakov Cc: Al Viro Cc: Christoph Hellwig Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/brec.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index 6e560d56094b..754fdf8c6356 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c @@ -131,13 +131,16 @@ skip: hfs_bnode_write(node, entry, data_off + key_len, entry_len); hfs_bnode_dump(node); - if (new_node) { - /* update parent key if we inserted a key - * at the start of the first node - */ - if (!rec && new_node != node) - hfs_brec_update_parent(fd); + /* + * update parent key if we inserted a key + * at the start of the node and it is not the new node + */ + if (!rec && new_node != node) { + hfs_bnode_read_key(node, fd->search_key, data_off + size); + hfs_brec_update_parent(fd); + } + if (new_node) { hfs_bnode_put(fd->bnode); if (!new_node->parent) { hfs_btree_inc_height(tree); @@ -168,9 +171,6 @@ skip: goto again; } - if (!rec) - hfs_brec_update_parent(fd); - return 0; } @@ -370,6 +370,8 @@ again: if (IS_ERR(parent)) return PTR_ERR(parent); __hfs_brec_find(parent, fd, hfs_find_rec_by_key); + if (fd->record < 0) + return -ENOENT; hfs_bnode_dump(parent); rec = fd->record; -- cgit v1.2.1 From bea66fbd11af1ca98ae26855eea41eda8582923e Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 25 Mar 2015 15:55:37 -0700 Subject: mm: numa: group related processes based on VMA flags instead of page table flags These are three follow-on patches based on the xfsrepair workload Dave Chinner reported was problematic in 4.0-rc1 due to changes in page table management -- https://lkml.org/lkml/2015/3/1/226. Much of the problem was reduced by commit 53da3bc2ba9e ("mm: fix up numa read-only thread grouping logic") and commit ba68bc0115eb ("mm: thp: Return the correct value for change_huge_pmd"). It was known that the performance in 3.19 was still better even if is far less safe. This series aims to restore the performance without compromising on safety. For the test of this mail, I'm comparing 3.19 against 4.0-rc4 and the three patches applied on top autonumabench 3.19.0 4.0.0-rc4 4.0.0-rc4 4.0.0-rc4 4.0.0-rc4 vanilla vanilla vmwrite-v5r8 preserve-v5r8 slowscan-v5r8 Time System-NUMA01 124.00 ( 0.00%) 161.86 (-30.53%) 107.13 ( 13.60%) 103.13 ( 16.83%) 145.01 (-16.94%) Time System-NUMA01_THEADLOCAL 115.54 ( 0.00%) 107.64 ( 6.84%) 131.87 (-14.13%) 83.30 ( 27.90%) 92.35 ( 20.07%) Time System-NUMA02 9.35 ( 0.00%) 10.44 (-11.66%) 8.95 ( 4.28%) 10.72 (-14.65%) 8.16 ( 12.73%) Time System-NUMA02_SMT 3.87 ( 0.00%) 4.63 (-19.64%) 4.57 (-18.09%) 3.99 ( -3.10%) 3.36 ( 13.18%) Time Elapsed-NUMA01 570.06 ( 0.00%) 567.82 ( 0.39%) 515.78 ( 9.52%) 517.26 ( 9.26%) 543.80 ( 4.61%) Time Elapsed-NUMA01_THEADLOCAL 393.69 ( 0.00%) 384.83 ( 2.25%) 384.10 ( 2.44%) 384.31 ( 2.38%) 380.73 ( 3.29%) Time Elapsed-NUMA02 49.09 ( 0.00%) 49.33 ( -0.49%) 48.86 ( 0.47%) 48.78 ( 0.63%) 50.94 ( -3.77%) Time Elapsed-NUMA02_SMT 47.51 ( 0.00%) 47.15 ( 0.76%) 47.98 ( -0.99%) 48.12 ( -1.28%) 49.56 ( -4.31%) 3.19.0 4.0.0-rc4 4.0.0-rc4 4.0.0-rc4 4.0.0-rc4 vanilla vanillavmwrite-v5r8preserve-v5r8slowscan-v5r8 User 46334.60 46391.94 44383.95 43971.89 44372.12 System 252.84 284.66 252.61 201.24 249.00 Elapsed 1062.14 1050.96 998.68 1000.94 1026.78 Overall the system CPU usage is comparable and the test is naturally a bit variable. The slowing of the scanner hurts numa01 but on this machine it is an adverse workload and patches that dramatically help it often hurt absolutely everything else. Due to patch 2, the fault activity is interesting 3.19.0 4.0.0-rc4 4.0.0-rc4 4.0.0-rc4 4.0.0-rc4 vanilla vanillavmwrite-v5r8preserve-v5r8slowscan-v5r8 Minor Faults 2097811 2656646 2597249 1981230 1636841 Major Faults 362 450 365 364 365 Note the impact preserving the write bit across protection updates and fault reduces faults. NUMA alloc hit 1229008 1217015 1191660 1178322 1199681 NUMA alloc miss 0 0 0 0 0 NUMA interleave hit 0 0 0 0 0 NUMA alloc local 1228514 1216317 1190871 1177448 1199021 NUMA base PTE updates 245706197 240041607 238195516 244704842 115012800 NUMA huge PMD updates 479530 468448 464868 477573 224487 NUMA page range updates 491225557 479886983 476207932 489222218 229950144 NUMA hint faults 659753 656503 641678 656926 294842 NUMA hint local faults 381604 373963 360478 337585 186249 NUMA hint local percent 57 56 56 51 63 NUMA pages migrated 5412140 6374899 6266530 5277468 5755096 AutoNUMA cost 5121% 5083% 4994% 5097% 2388% Here the impact of slowing the PTE scanner on migratrion failures is obvious as "NUMA base PTE updates" and "NUMA huge PMD updates" are massively reduced even though the headline performance is very similar. As xfsrepair was the reported workload here is the impact of the series on it. xfsrepair 3.19.0 4.0.0-rc4 4.0.0-rc4 4.0.0-rc4 4.0.0-rc4 vanilla vanilla vmwrite-v5r8 preserve-v5r8 slowscan-v5r8 Min real-fsmark 1183.29 ( 0.00%) 1165.73 ( 1.48%) 1152.78 ( 2.58%) 1153.64 ( 2.51%) 1177.62 ( 0.48%) Min syst-fsmark 4107.85 ( 0.00%) 4027.75 ( 1.95%) 3986.74 ( 2.95%) 3979.16 ( 3.13%) 4048.76 ( 1.44%) Min real-xfsrepair 441.51 ( 0.00%) 463.96 ( -5.08%) 449.50 ( -1.81%) 440.08 ( 0.32%) 439.87 ( 0.37%) Min syst-xfsrepair 195.76 ( 0.00%) 278.47 (-42.25%) 262.34 (-34.01%) 203.70 ( -4.06%) 143.64 ( 26.62%) Amean real-fsmark 1188.30 ( 0.00%) 1177.34 ( 0.92%) 1157.97 ( 2.55%) 1158.21 ( 2.53%) 1182.22 ( 0.51%) Amean syst-fsmark 4111.37 ( 0.00%) 4055.70 ( 1.35%) 3987.19 ( 3.02%) 3998.72 ( 2.74%) 4061.69 ( 1.21%) Amean real-xfsrepair 450.88 ( 0.00%) 468.32 ( -3.87%) 454.14 ( -0.72%) 442.36 ( 1.89%) 440.59 ( 2.28%) Amean syst-xfsrepair 199.66 ( 0.00%) 290.60 (-45.55%) 277.20 (-38.84%) 204.68 ( -2.51%) 150.55 ( 24.60%) Stddev real-fsmark 4.12 ( 0.00%) 10.82 (-162.29%) 4.14 ( -0.28%) 5.98 (-45.05%) 4.60 (-11.53%) Stddev syst-fsmark 2.63 ( 0.00%) 20.32 (-671.82%) 0.37 ( 85.89%) 16.47 (-525.59%) 15.05 (-471.79%) Stddev real-xfsrepair 6.87 ( 0.00%) 4.55 ( 33.75%) 3.46 ( 49.58%) 1.78 ( 74.12%) 0.52 ( 92.50%) Stddev syst-xfsrepair 3.02 ( 0.00%) 10.30 (-241.37%) 13.17 (-336.37%) 0.71 ( 76.63%) 5.00 (-65.61%) CoeffVar real-fsmark 0.35 ( 0.00%) 0.92 (-164.73%) 0.36 ( -2.91%) 0.52 (-48.82%) 0.39 (-12.10%) CoeffVar syst-fsmark 0.06 ( 0.00%) 0.50 (-682.41%) 0.01 ( 85.45%) 0.41 (-543.22%) 0.37 (-478.78%) CoeffVar real-xfsrepair 1.52 ( 0.00%) 0.97 ( 36.21%) 0.76 ( 49.94%) 0.40 ( 73.62%) 0.12 ( 92.33%) CoeffVar syst-xfsrepair 1.51 ( 0.00%) 3.54 (-134.54%) 4.75 (-214.31%) 0.34 ( 77.20%) 3.32 (-119.63%) Max real-fsmark 1193.39 ( 0.00%) 1191.77 ( 0.14%) 1162.90 ( 2.55%) 1166.66 ( 2.24%) 1188.50 ( 0.41%) Max syst-fsmark 4114.18 ( 0.00%) 4075.45 ( 0.94%) 3987.65 ( 3.08%) 4019.45 ( 2.30%) 4082.80 ( 0.76%) Max real-xfsrepair 457.80 ( 0.00%) 474.60 ( -3.67%) 457.82 ( -0.00%) 444.42 ( 2.92%) 441.03 ( 3.66%) Max syst-xfsrepair 203.11 ( 0.00%) 303.65 (-49.50%) 294.35 (-44.92%) 205.33 ( -1.09%) 155.28 ( 23.55%) The really relevant lines as syst-xfsrepair which is the system CPU usage when running xfsrepair. Note that on my machine the overhead was 45% higher on 4.0-rc4 which may be part of what Dave is seeing. Once we preserve the write bit across faults, it's only 2.51% higher on average. With the full series applied, system CPU usage is 24.6% lower on average. Again, the impact of preserving the write bit on minor faults is obvious and the impact of slowing scanning after migration failures is obvious on the PTE updates. Note also that the number of pages migrated is much reduced even though the headline performance is comparable. 3.19.0 4.0.0-rc4 4.0.0-rc4 4.0.0-rc4 4.0.0-rc4 vanilla vanillavmwrite-v5r8preserve-v5r8slowscan-v5r8 Minor Faults 153466827 254507978 249163829 153501373 105737890 Major Faults 610 702 690 649 724 NUMA base PTE updates 217735049 210756527 217729596 216937111 144344993 NUMA huge PMD updates 129294 85044 106921 127246 79887 NUMA pages migrated 21938995 29705270 28594162 22687324 16258075 3.19.0 4.0.0-rc4 4.0.0-rc4 4.0.0-rc4 4.0.0-rc4 vanilla vanillavmwrite-v5r8preserve-v5r8slowscan-v5r8 Mean sdb-avgqusz 13.47 2.54 2.55 2.47 2.49 Mean sdb-avgrqsz 202.32 140.22 139.50 139.02 138.12 Mean sdb-await 25.92 5.09 5.33 5.02 5.22 Mean sdb-r_await 4.71 0.19 0.83 0.51 0.11 Mean sdb-w_await 104.13 5.21 5.38 5.05 5.32 Mean sdb-svctm 0.59 0.13 0.14 0.13 0.14 Mean sdb-rrqm 0.16 0.00 0.00 0.00 0.00 Mean sdb-wrqm 3.59 1799.43 1826.84 1812.21 1785.67 Max sdb-avgqusz 111.06 12.13 14.05 11.66 15.60 Max sdb-avgrqsz 255.60 190.34 190.01 187.33 191.78 Max sdb-await 168.24 39.28 49.22 44.64 65.62 Max sdb-r_await 660.00 52.00 280.00 76.00 12.00 Max sdb-w_await 7804.00 39.28 49.22 44.64 65.62 Max sdb-svctm 4.00 2.82 2.86 1.98 2.84 Max sdb-rrqm 8.30 0.00 0.00 0.00 0.00 Max sdb-wrqm 34.20 5372.80 5278.60 5386.60 5546.15 FWIW, I also checked SPECjbb in different configurations but it's similar observations -- minor faults lower, PTE update activity lower and performance is roughly comparable against 3.19. This patch (of 3): Threads that share writable data within pages are grouped together as related tasks. This decision is based on whether the PTE is marked dirty which is subject to timing races between the PTE scanner update and when the application writes the page. If the page is file-backed, then background flushes and sync also affect placement. This is unpredictable behaviour which is impossible to reason about so this patch makes grouping decisions based on the VMA flags. Signed-off-by: Mel Gorman Reported-by: Dave Chinner Tested-by: Dave Chinner Cc: Ingo Molnar Cc: Aneesh Kumar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 13 ++----------- mm/memory.c | 19 +++++++++++-------- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 626e93db28ba..2f12e9fcf1a2 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1291,17 +1291,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, flags |= TNF_FAULT_LOCAL; } - /* - * Avoid grouping on DSO/COW pages in specific and RO pages - * in general, RO pages shouldn't hurt as much anyway since - * they can be in shared cache state. - * - * FIXME! This checks "pmd_dirty()" as an approximation of - * "is this a read-only page", since checking "pmd_write()" - * is even more broken. We haven't actually turned this into - * a writable page, so pmd_write() will always be false. - */ - if (!pmd_dirty(pmd)) + /* See similar comment in do_numa_page for explanation */ + if (!(vma->vm_flags & VM_WRITE)) flags |= TNF_NO_GROUP; /* diff --git a/mm/memory.c b/mm/memory.c index 411144f977b1..20beb6647dba 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3069,16 +3069,19 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, } /* - * Avoid grouping on DSO/COW pages in specific and RO pages - * in general, RO pages shouldn't hurt as much anyway since - * they can be in shared cache state. + * Avoid grouping on RO pages in general. RO pages shouldn't hurt as + * much anyway since they can be in shared cache state. This misses + * the case where a mapping is writable but the process never writes + * to it but pte_write gets cleared during protection updates and + * pte_dirty has unpredictable behaviour between PTE scan updates, + * background writeback, dirty balancing and application behaviour. * - * FIXME! This checks "pmd_dirty()" as an approximation of - * "is this a read-only page", since checking "pmd_write()" - * is even more broken. We haven't actually turned this into - * a writable page, so pmd_write() will always be false. + * TODO: Note that the ideal here would be to avoid a situation where a + * NUMA fault is taken immediately followed by a write fault in + * some cases which would have lower overhead overall but would be + * invasive as the fault paths would need to be unified. */ - if (!pte_dirty(pte)) + if (!(vma->vm_flags & VM_WRITE)) flags |= TNF_NO_GROUP; /* -- cgit v1.2.1 From b191f9b106ea1a24a711dbebb2925d3313da5852 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 25 Mar 2015 15:55:40 -0700 Subject: mm: numa: preserve PTE write permissions across a NUMA hinting fault Protecting a PTE to trap a NUMA hinting fault clears the writable bit and further faults are needed after trapping a NUMA hinting fault to set the writable bit again. This patch preserves the writable bit when trapping NUMA hinting faults. The impact is obvious from the number of minor faults trapped during the basis balancing benchmark and the system CPU usage; autonumabench 4.0.0-rc4 4.0.0-rc4 baseline preserve Time System-NUMA01 107.13 ( 0.00%) 103.13 ( 3.73%) Time System-NUMA01_THEADLOCAL 131.87 ( 0.00%) 83.30 ( 36.83%) Time System-NUMA02 8.95 ( 0.00%) 10.72 (-19.78%) Time System-NUMA02_SMT 4.57 ( 0.00%) 3.99 ( 12.69%) Time Elapsed-NUMA01 515.78 ( 0.00%) 517.26 ( -0.29%) Time Elapsed-NUMA01_THEADLOCAL 384.10 ( 0.00%) 384.31 ( -0.05%) Time Elapsed-NUMA02 48.86 ( 0.00%) 48.78 ( 0.16%) Time Elapsed-NUMA02_SMT 47.98 ( 0.00%) 48.12 ( -0.29%) 4.0.0-rc4 4.0.0-rc4 baseline preserve User 44383.95 43971.89 System 252.61 201.24 Elapsed 998.68 1000.94 Minor Faults 2597249 1981230 Major Faults 365 364 There is a similar drop in system CPU usage using Dave Chinner's xfsrepair workload 4.0.0-rc4 4.0.0-rc4 baseline preserve Amean real-xfsrepair 454.14 ( 0.00%) 442.36 ( 2.60%) Amean syst-xfsrepair 277.20 ( 0.00%) 204.68 ( 26.16%) The patch looks hacky but the alternatives looked worse. The tidest was to rewalk the page tables after a hinting fault but it was more complex than this approach and the performance was worse. It's not generally safe to just mark the page writable during the fault if it's a write fault as it may have been read-only for COW so that approach was discarded. Signed-off-by: Mel Gorman Reported-by: Dave Chinner Tested-by: Dave Chinner Cc: Ingo Molnar Cc: Aneesh Kumar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 9 ++++++++- mm/memory.c | 8 +++----- mm/mprotect.c | 3 +++ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2f12e9fcf1a2..0a42d1521aa4 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1260,6 +1260,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, int target_nid, last_cpupid = -1; bool page_locked; bool migrated = false; + bool was_writable; int flags = 0; /* A PROT_NONE fault should not end up here */ @@ -1354,7 +1355,10 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, goto out; clear_pmdnuma: BUG_ON(!PageLocked(page)); + was_writable = pmd_write(pmd); pmd = pmd_modify(pmd, vma->vm_page_prot); + if (was_writable) + pmd = pmd_mkwrite(pmd); set_pmd_at(mm, haddr, pmdp, pmd); update_mmu_cache_pmd(vma, addr, pmdp); unlock_page(page); @@ -1478,6 +1482,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { pmd_t entry; + bool preserve_write = prot_numa && pmd_write(*pmd); ret = 1; /* @@ -1493,9 +1498,11 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, if (!prot_numa || !pmd_protnone(*pmd)) { entry = pmdp_get_and_clear_notify(mm, addr, pmd); entry = pmd_modify(entry, newprot); + if (preserve_write) + entry = pmd_mkwrite(entry); ret = HPAGE_PMD_NR; set_pmd_at(mm, addr, pmd, entry); - BUG_ON(pmd_write(entry)); + BUG_ON(!preserve_write && pmd_write(entry)); } spin_unlock(ptl); } diff --git a/mm/memory.c b/mm/memory.c index 20beb6647dba..d20e12da3a3c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3035,6 +3035,7 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, int last_cpupid; int target_nid; bool migrated = false; + bool was_writable = pte_write(pte); int flags = 0; /* A PROT_NONE fault should not end up here */ @@ -3059,6 +3060,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, /* Make it present again */ pte = pte_modify(pte, vma->vm_page_prot); pte = pte_mkyoung(pte); + if (was_writable) + pte = pte_mkwrite(pte); set_pte_at(mm, addr, ptep, pte); update_mmu_cache(vma, addr, ptep); @@ -3075,11 +3078,6 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, * to it but pte_write gets cleared during protection updates and * pte_dirty has unpredictable behaviour between PTE scan updates, * background writeback, dirty balancing and application behaviour. - * - * TODO: Note that the ideal here would be to avoid a situation where a - * NUMA fault is taken immediately followed by a write fault in - * some cases which would have lower overhead overall but would be - * invasive as the fault paths would need to be unified. */ if (!(vma->vm_flags & VM_WRITE)) flags |= TNF_NO_GROUP; diff --git a/mm/mprotect.c b/mm/mprotect.c index 44727811bf4c..88584838e704 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -75,6 +75,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, oldpte = *pte; if (pte_present(oldpte)) { pte_t ptent; + bool preserve_write = prot_numa && pte_write(oldpte); /* * Avoid trapping faults against the zero or KSM @@ -94,6 +95,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, ptent = ptep_modify_prot_start(mm, addr, pte); ptent = pte_modify(ptent, newprot); + if (preserve_write) + ptent = pte_mkwrite(ptent); /* Avoid taking write faults for known dirty pages */ if (dirty_accountable && pte_dirty(ptent) && -- cgit v1.2.1 From 074c238177a75f5e79af3b2cb6a84e54823ef950 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 25 Mar 2015 15:55:42 -0700 Subject: mm: numa: slow PTE scan rate if migration failures occur Dave Chinner reported the following on https://lkml.org/lkml/2015/3/1/226 Across the board the 4.0-rc1 numbers are much slower, and the degradation is far worse when using the large memory footprint configs. Perf points straight at the cause - this is from 4.0-rc1 on the "-o bhash=101073" config: - 56.07% 56.07% [kernel] [k] default_send_IPI_mask_sequence_phys - default_send_IPI_mask_sequence_phys - 99.99% physflat_send_IPI_mask - 99.37% native_send_call_func_ipi smp_call_function_many - native_flush_tlb_others - 99.85% flush_tlb_page ptep_clear_flush try_to_unmap_one rmap_walk try_to_unmap migrate_pages migrate_misplaced_page - handle_mm_fault - 99.73% __do_page_fault trace_do_page_fault do_async_page_fault + async_page_fault 0.63% native_send_call_func_single_ipi generic_exec_single smp_call_function_single This is showing excessive migration activity even though excessive migrations are meant to get throttled. Normally, the scan rate is tuned on a per-task basis depending on the locality of faults. However, if migrations fail for any reason then the PTE scanner may scan faster if the faults continue to be remote. This means there is higher system CPU overhead and fault trapping at exactly the time we know that migrations cannot happen. This patch tracks when migration failures occur and slows the PTE scanner. Signed-off-by: Mel Gorman Reported-by: Dave Chinner Tested-by: Dave Chinner Cc: Ingo Molnar Cc: Aneesh Kumar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 9 +++++---- kernel/sched/fair.c | 8 ++++++-- mm/huge_memory.c | 3 ++- mm/memory.c | 3 ++- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 6d77432e14ff..a419b65770d6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1625,11 +1625,11 @@ struct task_struct { /* * numa_faults_locality tracks if faults recorded during the last - * scan window were remote/local. The task scan period is adapted - * based on the locality of the faults with different weights - * depending on whether they were shared or private faults + * scan window were remote/local or failed to migrate. The task scan + * period is adapted based on the locality of the faults with different + * weights depending on whether they were shared or private faults */ - unsigned long numa_faults_locality[2]; + unsigned long numa_faults_locality[3]; unsigned long numa_pages_migrated; #endif /* CONFIG_NUMA_BALANCING */ @@ -1719,6 +1719,7 @@ struct task_struct { #define TNF_NO_GROUP 0x02 #define TNF_SHARED 0x04 #define TNF_FAULT_LOCAL 0x08 +#define TNF_MIGRATE_FAIL 0x10 #ifdef CONFIG_NUMA_BALANCING extern void task_numa_fault(int last_node, int node, int pages, int flags); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7ce18f3c097a..bcfe32088b37 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1609,9 +1609,11 @@ static void update_task_scan_period(struct task_struct *p, /* * If there were no record hinting faults then either the task is * completely idle or all activity is areas that are not of interest - * to automatic numa balancing. Scan slower + * to automatic numa balancing. Related to that, if there were failed + * migration then it implies we are migrating too quickly or the local + * node is overloaded. In either case, scan slower */ - if (local + shared == 0) { + if (local + shared == 0 || p->numa_faults_locality[2]) { p->numa_scan_period = min(p->numa_scan_period_max, p->numa_scan_period << 1); @@ -2080,6 +2082,8 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) if (migrated) p->numa_pages_migrated += pages; + if (flags & TNF_MIGRATE_FAIL) + p->numa_faults_locality[2] += pages; p->numa_faults[task_faults_idx(NUMA_MEMBUF, mem_node, priv)] += pages; p->numa_faults[task_faults_idx(NUMA_CPUBUF, cpu_node, priv)] += pages; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 0a42d1521aa4..51b3e7c64622 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1350,7 +1350,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, if (migrated) { flags |= TNF_MIGRATED; page_nid = target_nid; - } + } else + flags |= TNF_MIGRATE_FAIL; goto out; clear_pmdnuma: diff --git a/mm/memory.c b/mm/memory.c index d20e12da3a3c..97839f5c8c30 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3103,7 +3103,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, if (migrated) { page_nid = target_nid; flags |= TNF_MIGRATED; - } + } else + flags |= TNF_MIGRATE_FAIL; out: if (page_nid != -1) -- cgit v1.2.1 From b7b04004ecd9e58cdc6c6ff92f251d5ac5c0adb2 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 25 Mar 2015 15:55:45 -0700 Subject: mm: numa: mark huge PTEs young when clearing NUMA hinting faults Base PTEs are marked young when the NUMA hinting information is cleared but the same does not happen for huge pages which this patch addresses. Note that migrated pages are not marked young as the base page migration code does not assume that migrated pages have been referenced. This could be addressed but beyond the scope of this series which is aimed at Dave Chinners shrink workload that is unlikely to be affected by this issue. Signed-off-by: Mel Gorman Cc: Dave Chinner Cc: Ingo Molnar Cc: Aneesh Kumar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 51b3e7c64622..6817b0350c71 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1358,6 +1358,7 @@ clear_pmdnuma: BUG_ON(!PageLocked(page)); was_writable = pmd_write(pmd); pmd = pmd_modify(pmd, vma->vm_page_prot); + pmd = pmd_mkyoung(pmd); if (was_writable) pmd = pmd_mkwrite(pmd); set_pmd_at(mm, haddr, pmdp, pmd); -- cgit v1.2.1 From 6914e1e3f63caa829431160f0f7093292daef2d5 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 26 Mar 2015 09:25:44 +0530 Subject: ARC: SA_SIGINFO ucontext regs off-by-one The regfile provided to SA_SIGINFO signal handler as ucontext was off by one due to pt_regs gutter cleanups in 2013. Before handling signal, user pt_regs are copied onto user_regs_struct and copied back later. Both structs are binary compatible. This was all fine until commit 2fa919045b72 (ARC: pt_regs update #2) which removed the empty stack slot at top of pt_regs (corresponding to first pad) and made the corresponding fixup in struct user_regs_struct (the pad in there was moved out of @scratch - not removed altogether as it is part of ptrace ABI) struct user_regs_struct { + long pad; struct { - long pad; long bta, lp_start, lp_end,.... } scratch; ... } This meant that now user_regs_struct was off by 1 reg w.r.t pt_regs and signal code needs to user_regs_struct.scratch to reflect it as pt_regs, which is what this commit does. This problem was hidden for 2 years, because both save/restore, despite using wrong location, were using the same location. Only an interim inspection (reproducer below) exposed the issue. void handle_segv(int signo, siginfo_t *info, void *context) { ucontext_t *uc = context; struct user_regs_struct *regs = &(uc->uc_mcontext.regs); printf("regs %x %x\n", <=== prints 7 8 (vs. 8 9) regs->scratch.r8, regs->scratch.r9); } int main() { struct sigaction sa; sa.sa_sigaction = handle_segv; sa.sa_flags = SA_SIGINFO; sigemptyset(&sa.sa_mask); sigaction(SIGSEGV, &sa, NULL); asm volatile( "mov r7, 7 \n" "mov r8, 8 \n" "mov r9, 9 \n" "mov r10, 10 \n" :::"r7","r8","r9","r10"); *((unsigned int*)0x10) = 0; } Fixes: 2fa919045b72ec892e "ARC: pt_regs update #2: Remove unused gutter at start of pt_regs" CC: Signed-off-by: Vineet Gupta --- arch/arc/kernel/signal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index 114234e83caa..fdd3c98dfb0f 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -67,7 +67,7 @@ stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs, sigset_t *set) { int err; - err = __copy_to_user(&(sf->uc.uc_mcontext.regs), regs, + err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), regs, sizeof(sf->uc.uc_mcontext.regs.scratch)); err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t)); @@ -83,7 +83,7 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf) if (!err) set_current_blocked(&set); - err |= __copy_from_user(regs, &(sf->uc.uc_mcontext.regs), + err |= __copy_from_user(regs, &(sf->uc.uc_mcontext.regs.scratch), sizeof(sf->uc.uc_mcontext.regs.scratch)); return err; -- cgit v1.2.1 From e4140819dadc3624accac8294881bca8a3cba4ed Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 26 Mar 2015 11:14:41 +0530 Subject: ARC: signal handling robustify A malicious signal handler / restorer can DOS the system by fudging the user regs saved on stack, causing weird things such as sigreturn returning to user mode PC but cpu state still being kernel mode.... Ensure that in sigreturn path status32 always has U bit; any other bogosity (gargbage PC etc) will be taken care of by normal user mode exceptions mechanisms. Reproducer signal handler: void handle_sig(int signo, siginfo_t *info, void *context) { ucontext_t *uc = context; struct user_regs_struct *regs = &(uc->uc_mcontext.regs); regs->scratch.status32 = 0; } Before the fix, kernel would go off to weeds like below: --------->8----------- [ARCLinux]$ ./signal-test Path: /signal-test CPU: 0 PID: 61 Comm: signal-test Not tainted 4.0.0-rc5+ #65 task: 8f177880 ti: 5ffe6000 task.ti: 8f15c000 [ECR ]: 0x00220200 => Invalid Write @ 0x00000010 by insn @ 0x00010698 [EFA ]: 0x00000010 [BLINK ]: 0x2007c1ee [ERET ]: 0x10698 [STAT32]: 0x00000000 : <-------- BTA: 0x00010680 SP: 0x5ffe7e48 FP: 0x00000000 LPS: 0x20003c6c LPE: 0x20003c70 LPC: 0x00000000 ... --------->8----------- Reported-by: Alexey Brodkin Cc: Signed-off-by: Vineet Gupta --- arch/arc/kernel/signal.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index fdd3c98dfb0f..edda76fae83f 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -131,6 +131,15 @@ SYSCALL_DEFINE0(rt_sigreturn) /* Don't restart from sigreturn */ syscall_wont_restart(regs); + /* + * Ensure that sigreturn always returns to user mode (in case the + * regs saved on user stack got fudged between save and sigreturn) + * Otherwise it is easy to panic the kernel with a custom + * signal handler and/or restorer which clobberes the status32/ret + * to return to a bogus location in kernel mode. + */ + regs->status32 |= STATUS_U_MASK; + return regs->r0; badframe: @@ -229,8 +238,11 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) /* * handler returns using sigreturn stub provided already by userpsace + * If not, nuke the process right away */ - BUG_ON(!(ksig->ka.sa.sa_flags & SA_RESTORER)); + if(!(ksig->ka.sa.sa_flags & SA_RESTORER)) + return 1; + regs->blink = (unsigned long)ksig->ka.sa.sa_restorer; /* User Stack for signal handler will be above the frame just carved */ @@ -296,12 +308,12 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) { sigset_t *oldset = sigmask_to_save(); - int ret; + int failed; /* Set up the stack frame */ - ret = setup_rt_frame(ksig, oldset, regs); + failed = setup_rt_frame(ksig, oldset, regs); - signal_setup_done(ret, ksig, 0); + signal_setup_done(failed, ksig, 0); } void do_signal(struct pt_regs *regs) -- cgit v1.2.1 From db48abf4367cb1f9e118defee0a37238638c2752 Mon Sep 17 00:00:00 2001 From: Libin Yang Date: Thu, 26 Mar 2015 13:28:39 +0800 Subject: ALSA: hda_intel: apply the Seperate stream_tag for Sunrise Point The total stream number of Sunrise Point's input and output stream exceeds 15, which will cause some streams do not work because of the overflow on SDxCTL.STRM field if using the legacy stream tag allocation method. This patch uses the new stream tag allocation method by add the flag AZX_DCAPS_SEPARATE_STREAM_TAG for Skylake platform. Signed-off-by: Libin Yang Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 4ca3d5d02436..a8a1e14272a1 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1989,7 +1989,7 @@ static const struct pci_device_id azx_ids[] = { .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, /* Sunrise Point */ { PCI_DEVICE(0x8086, 0xa170), - .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, /* Sunrise Point-LP */ { PCI_DEVICE(0x8086, 0x9d70), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, -- cgit v1.2.1 From 832a3aad1e2927b1684e7369c9f36a370e0b95da Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 18 Mar 2015 18:19:22 +0000 Subject: drm/i915: Keep ring->active_list and ring->requests_list consistent If we retire requests last, we may use a later seqno and so clear the requests lists without clearing the active list, leading to confusion. Hence we should retire requests first for consistency with the early return. The order used to be important as the lifecycle for the object on the active list was determined by request->seqno. However, the requests themselves are now reference counted removing the constraint from the order of retirement. Fixes regression from commit 1b5a433a4dd967b125131da42b89b5cc0d5b1f57 Author: John Harrison Date: Mon Nov 24 18:49:42 2014 +0000 drm/i915: Convert 'i915_seqno_passed' calls into 'i915_gem_request_completed ' and a WARNING: CPU: 0 PID: 1383 at drivers/gpu/drm/i915/i915_gem_evict.c:279 i915_gem_evict_vm+0x10c/0x140() WARN_ON(!list_empty(&vm->active_list)) Identified by updating WATCH_LISTS: [drm:i915_verify_lists] *ERROR* blitter ring: active list not empty, but no requests WARNING: CPU: 0 PID: 681 at drivers/gpu/drm/i915/i915_gem.c:2751 i915_gem_retire_requests_ring+0x149/0x230() WARN_ON(i915_verify_lists(ring->dev)) Note that this is only a problem in evict_vm where the following happens after a retire_request has cleaned out all requests, but not all active bo: - intel_ring_idle called from i915_gpu_idle notices that no requests are outstanding and immediately returns. - i915_gem_retire_requests_ring called from i915_gem_retire_requests also immediately returns when there's no request, still leaving the bo on the active list. - evict_vm hits the WARN_ON(!list_empty(&vm->active_list)) after evicting all active objects that there's still stuff left that shouldn't be there. Signed-off-by: Chris Wilson Cc: John Harrison Cc: Daniel Vetter Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5b205863b659..27ea6bdebce7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2737,24 +2737,11 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) WARN_ON(i915_verify_lists(ring->dev)); - /* Move any buffers on the active list that are no longer referenced - * by the ringbuffer to the flushing/inactive lists as appropriate, - * before we free the context associated with the requests. + /* Retire requests first as we use it above for the early return. + * If we retire requests last, we may use a later seqno and so clear + * the requests lists without clearing the active list, leading to + * confusion. */ - while (!list_empty(&ring->active_list)) { - struct drm_i915_gem_object *obj; - - obj = list_first_entry(&ring->active_list, - struct drm_i915_gem_object, - ring_list); - - if (!i915_gem_request_completed(obj->last_read_req, true)) - break; - - i915_gem_object_move_to_inactive(obj); - } - - while (!list_empty(&ring->request_list)) { struct drm_i915_gem_request *request; struct intel_ringbuffer *ringbuf; @@ -2789,6 +2776,23 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) i915_gem_free_request(request); } + /* Move any buffers on the active list that are no longer referenced + * by the ringbuffer to the flushing/inactive lists as appropriate, + * before we free the context associated with the requests. + */ + while (!list_empty(&ring->active_list)) { + struct drm_i915_gem_object *obj; + + obj = list_first_entry(&ring->active_list, + struct drm_i915_gem_object, + ring_list); + + if (!i915_gem_request_completed(obj->last_read_req, true)) + break; + + i915_gem_object_move_to_inactive(obj); + } + if (unlikely(ring->trace_irq_req && i915_gem_request_completed(ring->trace_irq_req, true))) { ring->irq_put(ring); -- cgit v1.2.1 From 11bc26fe372fa6da81c82c68f755d2795838a640 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Thu, 26 Mar 2015 10:27:06 +0100 Subject: clocksource/drivers: Fix various !CONFIG_HAS_IOMEM build errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix !CONFIG_HAS_IOMEM related build failures in three clocksource drivers. The build failures have the pattern of: drivers/clocksource/sh_cmt.c: In function ‘sh_cmt_map_memory’: drivers/clocksource/sh_cmt.c:920:2: error: implicit declaration of function ‘ioremap_nocache’ [-Werror=implicit-function-declaration] cmt->mapbase = ioremap_nocache(mem->start, resource_size(mem)); Signed-off-by: Richard Weinberger Signed-off-by: Daniel Lezcano Acked-by: Geert Uytterhoeven Cc: maxime.ripard@free-electrons.com Link: http://lkml.kernel.org/r/1427362029-6511-1-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 68161f7a07d6..a0b036ccb118 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -192,6 +192,7 @@ config SYS_SUPPORTS_EM_STI config SH_TIMER_CMT bool "Renesas CMT timer driver" if COMPILE_TEST depends on GENERIC_CLOCKEVENTS + depends on HAS_IOMEM default SYS_SUPPORTS_SH_CMT help This enables build of a clocksource and clockevent driver for @@ -201,6 +202,7 @@ config SH_TIMER_CMT config SH_TIMER_MTU2 bool "Renesas MTU2 timer driver" if COMPILE_TEST depends on GENERIC_CLOCKEVENTS + depends on HAS_IOMEM default SYS_SUPPORTS_SH_MTU2 help This enables build of a clockevent driver for the Multi-Function @@ -210,6 +212,7 @@ config SH_TIMER_MTU2 config SH_TIMER_TMU bool "Renesas TMU timer driver" if COMPILE_TEST depends on GENERIC_CLOCKEVENTS + depends on HAS_IOMEM default SYS_SUPPORTS_SH_TMU help This enables build of a clocksource and clockevent driver for -- cgit v1.2.1 From 6e206020324c50a95486f6b279a53512febed92d Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 26 Mar 2015 10:27:09 +0100 Subject: clocksource/drivers/sun5i: Fix cpufreq interaction with sched_clock() The sun5i timer is used as the sched-clock on certain systems, and ever since we started using cpufreq, the cpu clock (that is one of the timer's clock indirect parent) now changes as well, along with the actual sched_clock() rate. This is not accurate and not desirable. We can safely remove the sun5i sched-clock on those systems, since we have other reliable sched_clock() sources in the system. Tested-by: Hans de Goede Signed-off-by: Maxime Ripard Signed-off-by: Daniel Lezcano [ Improved the changelog. ] Cc: richard@nod.at Link: http://lkml.kernel.org/r/1427362029-6511-4-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar --- drivers/clocksource/timer-sun5i.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c index 5dcbf90b8015..58597fbcc046 100644 --- a/drivers/clocksource/timer-sun5i.c +++ b/drivers/clocksource/timer-sun5i.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -137,11 +136,6 @@ static struct irqaction sun5i_timer_irq = { .dev_id = &sun5i_clockevent, }; -static u64 sun5i_timer_sched_read(void) -{ - return ~readl(timer_base + TIMER_CNTVAL_LO_REG(1)); -} - static void __init sun5i_timer_init(struct device_node *node) { struct reset_control *rstc; @@ -172,7 +166,6 @@ static void __init sun5i_timer_init(struct device_node *node) writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD, timer_base + TIMER_CTL_REG(1)); - sched_clock_register(sun5i_timer_sched_read, 32, rate); clocksource_mmio_init(timer_base + TIMER_CNTVAL_LO_REG(1), node->name, rate, 340, 32, clocksource_mmio_readl_down); -- cgit v1.2.1 From af95b41426e0b58279f8ff0ebe420df49a4e96b8 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Thu, 26 Mar 2015 17:14:55 +0800 Subject: ALSA: hda - Add one more node in the EAPD supporting candidate list We have a HP machine which use the codec node 0x17 connecting the internal speaker, and from the node capability, we saw the EAPD, if we don't set the EAPD on for this node, the internal speaker can't output any sound. Cc: BugLink: https://bugs.launchpad.net/bugs/1436745 Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 7b0f72c5c6f1..74382137b9f5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -396,7 +396,7 @@ static void alc_auto_setup_eapd(struct hda_codec *codec, bool on) { /* We currently only handle front, HP */ static hda_nid_t pins[] = { - 0x0f, 0x10, 0x14, 0x15, 0 + 0x0f, 0x10, 0x14, 0x15, 0x17, 0 }; hda_nid_t *p; for (p = pins; *p; p++) -- cgit v1.2.1 From 3164a803416832d61268b758112e8dfd7e35cdf7 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Thu, 5 Feb 2015 19:24:25 +0000 Subject: drm/i915: Fix atomic state when reusing the firmware fb Right now, we get a warning when taking over the firmware fb: [drm:drm_atomic_plane_check] FB set but no CRTC with the following backtrace: [] drm_atomic_check_only+0x35d/0x510 [drm] [] drm_atomic_commit+0x17/0x60 [drm] [] drm_atomic_helper_plane_set_property+0x8d/0xd0 [drm_kms_helper] [] drm_mode_plane_set_obj_prop+0x2d/0x90 [drm] [] restore_fbdev_mode+0x6b/0xf0 [drm_kms_helper] [] drm_fb_helper_restore_fbdev_mode_unlocked+0x29/0x80 [drm_kms_helper] [] drm_fb_helper_set_par+0x22/0x50 [drm_kms_helper] [] intel_fbdev_set_par+0x1a/0x60 [i915] [] fbcon_init+0x4f4/0x580 That's because we update the plane state with the fb from the firmware, but we never associate the plane to that CRTC. We don't quite have the full DRM take over from HW state just yet, so fake enough of the plane atomic state to pass the checks. v2: Fix the state on which we set the CRTC in the case we're sharing the initial fb with another pipe. (Matt) Signed-off-by: Damien Lespiau Reviewed-by: Matt Roper Signed-off-by: Daniel Vetter [Jani: backported to drm-intel-fixes for v4.0-rc] Reference: http://mid.gmane.org/CA+5PVA7yXH=U757w8V=Zj2U1URG4nYNav20NpjtQ4svVueyPNw@mail.gmail.com Reference: http://lkml.kernel.org/r/CA+55aFweWR=nDzc2Y=rCtL_H8JfdprQiCimN5dwc+TgyD4Bjsg@mail.gmail.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1c12262029fb..30faf6c262e5 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2439,7 +2439,11 @@ intel_find_plane_obj(struct intel_crtc *intel_crtc, return; if (intel_alloc_plane_obj(intel_crtc, plane_config)) { - update_state_fb(intel_crtc->base.primary); + struct drm_plane *primary = intel_crtc->base.primary; + + primary->state->crtc = &intel_crtc->base; + update_state_fb(primary); + return; } @@ -2464,11 +2468,14 @@ intel_find_plane_obj(struct intel_crtc *intel_crtc, continue; if (i915_gem_obj_ggtt_offset(obj) == plane_config->base) { + struct drm_plane *primary = intel_crtc->base.primary; + if (obj->tiling_mode != I915_TILING_NONE) dev_priv->preserve_bios_swizzle = true; drm_framebuffer_reference(c->primary->fb); - intel_crtc->base.primary->fb = c->primary->fb; + primary->fb = c->primary->fb; + primary->state->crtc = &intel_crtc->base; obj->frontbuffer_bits |= INTEL_FRONTBUFFER_PRIMARY(intel_crtc->pipe); break; } -- cgit v1.2.1 From 5f407751b0ca9bd876fe8f15ff28153661c6ba0a Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 25 Mar 2015 18:30:38 +0100 Subject: drm/i915: Fixup legacy plane->crtc link for initial fb config This is a very similar bug in the load detect code fixed in commit 9128b040eb774e04bc23777b005ace2b66ab2a85 Author: Daniel Vetter Date: Tue Mar 3 17:31:21 2015 +0100 drm/i915: Fix modeset state confusion in the load detect code But this time around it was the initial fb code that forgot to update the plane->crtc pointer. Otherwise it's the exact same bug, with the exact same restrains (any set_config call/ioctl that doesn't disable the pipe papers over the bug for free, so fairly hard to hit in normal testing). So if you want the full explanation just go read that one over there - it's rather long ... Cc: Matt Roper Cc: Linus Torvalds Cc: Chris Wilson Cc: Josh Boyer Cc: Jani Nikula Reported-and-tested-by: Josh Boyer Signed-off-by: Daniel Vetter [Jani: backported to drm-intel-fixes for v4.0-rc] Reference: http://mid.gmane.org/CA+5PVA7ChbtJrknqws1qvZcbrg1CW2pQAFkSMURWWgyASRyGXg@mail.gmail.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 30faf6c262e5..f75173c20f47 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2442,6 +2442,7 @@ intel_find_plane_obj(struct intel_crtc *intel_crtc, struct drm_plane *primary = intel_crtc->base.primary; primary->state->crtc = &intel_crtc->base; + primary->crtc = &intel_crtc->base; update_state_fb(primary); return; @@ -2476,6 +2477,7 @@ intel_find_plane_obj(struct intel_crtc *intel_crtc, drm_framebuffer_reference(c->primary->fb); primary->fb = c->primary->fb; primary->state->crtc = &intel_crtc->base; + primary->crtc = &intel_crtc->base; obj->frontbuffer_bits |= INTEL_FRONTBUFFER_PRIMARY(intel_crtc->pipe); break; } -- cgit v1.2.1 From 9ffd906d9a6e50c958bd99971d762a426a12a36a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 11 Feb 2015 13:26:21 +0300 Subject: watchdog: mtk_wdt: signedness bug in mtk_wdt_start() "ret" should be signed for the error handling to work correctly. This doesn't matter much in real life since mtk_wdt_set_timeout() always succeeds. Signed-off-by: Dan Carpenter Reviewed-by: Matthias Brugger Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/mtk_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c index a87f6df6e85f..938b987de551 100644 --- a/drivers/watchdog/mtk_wdt.c +++ b/drivers/watchdog/mtk_wdt.c @@ -133,7 +133,7 @@ static int mtk_wdt_start(struct watchdog_device *wdt_dev) u32 reg; struct mtk_wdt_dev *mtk_wdt = watchdog_get_drvdata(wdt_dev); void __iomem *wdt_base = mtk_wdt->wdt_base; - u32 ret; + int ret; ret = mtk_wdt_set_timeout(wdt_dev, wdt_dev->timeout); if (ret < 0) -- cgit v1.2.1 From a629c08fdb98ebb184d745553af9dda4f05941bf Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 20 Feb 2015 23:45:44 +0000 Subject: watchdog: imgpdc: Fix probe NULL pointer dereference The IMG PDC watchdog probe function calls pdc_wdt_stop() prior to watchdog_set_drvdata(), causing a NULL pointer dereference when pdc_wdt_stop() retrieves the struct pdc_wdt_dev pointer using watchdog_get_drvdata() and reads the register base address through it. Fix by moving the watchdog_set_drvdata() call earlier, to where various other pdc_wdt->wdt_dev fields are initialised. Fixes: 93937669e9b5 ("watchdog: ImgTec PDC Watchdog Timer Driver") Signed-off-by: James Hogan Cc: Ezequiel Garcia Cc: Naidu Tellapati Cc: Jude Abraham Cc: linux-watchdog@vger.kernel.org Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/imgpdc_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/imgpdc_wdt.c b/drivers/watchdog/imgpdc_wdt.c index c8def68d9e4c..32c35eb31e65 100644 --- a/drivers/watchdog/imgpdc_wdt.c +++ b/drivers/watchdog/imgpdc_wdt.c @@ -191,6 +191,7 @@ static int pdc_wdt_probe(struct platform_device *pdev) pdc_wdt->wdt_dev.ops = &pdc_wdt_ops; pdc_wdt->wdt_dev.max_timeout = 1 << PDC_WDT_CONFIG_DELAY_MASK; pdc_wdt->wdt_dev.parent = &pdev->dev; + watchdog_set_drvdata(&pdc_wdt->wdt_dev, pdc_wdt); ret = watchdog_init_timeout(&pdc_wdt->wdt_dev, heartbeat, &pdev->dev); if (ret < 0) { @@ -232,7 +233,6 @@ static int pdc_wdt_probe(struct platform_device *pdev) watchdog_set_nowayout(&pdc_wdt->wdt_dev, nowayout); platform_set_drvdata(pdev, pdc_wdt); - watchdog_set_drvdata(&pdc_wdt->wdt_dev, pdc_wdt); ret = watchdog_register_device(&pdc_wdt->wdt_dev); if (ret) -- cgit v1.2.1 From ae6ee2fd47f76db5a1cd02c23378057bd21c2c8d Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 20 Feb 2015 23:45:45 +0000 Subject: watchdog: imgpdc: Fix default heartbeat The IMG PDC watchdog driver heartbeat module parameter has no default so it is initialised to zero. This results in the following warning during probe: imgpdc-wdt 2006000.wdt: Initial timeout out of range! setting max timeout The module parameter description implies that the default value should be PDC_WDT_DEF_TIMEOUT, which isn't yet used, so initialise it to that. Also tweak the heartbeat module parameter description for consistency. Fixes: 93937669e9b5 ("watchdog: ImgTec PDC Watchdog Timer Driver") Signed-off-by: James Hogan Cc: Ezequiel Garcia Cc: Naidu Tellapati Cc: Jude Abraham Cc: linux-watchdog@vger.kernel.org Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/imgpdc_wdt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/watchdog/imgpdc_wdt.c b/drivers/watchdog/imgpdc_wdt.c index 32c35eb31e65..0deaa4f971f5 100644 --- a/drivers/watchdog/imgpdc_wdt.c +++ b/drivers/watchdog/imgpdc_wdt.c @@ -42,10 +42,10 @@ #define PDC_WDT_MIN_TIMEOUT 1 #define PDC_WDT_DEF_TIMEOUT 64 -static int heartbeat; +static int heartbeat = PDC_WDT_DEF_TIMEOUT; module_param(heartbeat, int, 0); -MODULE_PARM_DESC(heartbeat, "Watchdog heartbeats in seconds. " - "(default = " __MODULE_STRING(PDC_WDT_DEF_TIMEOUT) ")"); +MODULE_PARM_DESC(heartbeat, "Watchdog heartbeats in seconds " + "(default=" __MODULE_STRING(PDC_WDT_DEF_TIMEOUT) ")"); static bool nowayout = WATCHDOG_NOWAYOUT; module_param(nowayout, bool, 0); -- cgit v1.2.1 From 78d84bc3734c2566dbba09baae2414734661ed6a Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Tue, 17 Mar 2015 12:35:41 +0000 Subject: arm64: juno: Fix misleading name of UART reference clock The UART reference clock speed is 7273.8 kHz, not 72738 kHz. Dots aren't usually used in node names even though ePAPR permits them. However, this can easily be avoided by expressing the frequency in Hz, not kHz. This patch changes the name to refclk7273800hz, reflecting the actual clock speed. Signed-off-by: Dave Martin Acked-by: Liviu Dudau Signed-off-by: Olof Johansson --- arch/arm64/boot/dts/arm/juno-clocks.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/arm/juno-clocks.dtsi b/arch/arm64/boot/dts/arm/juno-clocks.dtsi index ea2b5666a16f..c9b89efe0f56 100644 --- a/arch/arm64/boot/dts/arm/juno-clocks.dtsi +++ b/arch/arm64/boot/dts/arm/juno-clocks.dtsi @@ -8,7 +8,7 @@ */ /* SoC fixed clocks */ - soc_uartclk: refclk72738khz { + soc_uartclk: refclk7273800hz { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <7273800>; -- cgit v1.2.1 From e42391cd048809d903291d07f86ed3934ce138e9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 29 Mar 2015 15:26:31 -0700 Subject: Linux 4.0-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 14c722f96877..da36a3be7969 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Hurr durr I'ma sheep # *DOCUMENTATION* -- cgit v1.2.1 From 7081adf3f98f4c39dc6758208775b2aa48f51f8a Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Mon, 30 Mar 2015 17:05:37 +0800 Subject: ALSA: hda/realtek - Support Dell headset mode for ALC256 Dell new platform of ALC256 audio codec. Support headset mode for Dell ALC256 platform. Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 74382137b9f5..f75c1274b448 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3607,6 +3607,7 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) switch (codec->vendor_id) { case 0x10ec0255: + case 0x10ec0256: alc_process_coef_fw(codec, coef0255); break; case 0x10ec0233: @@ -3662,6 +3663,7 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin, switch (codec->vendor_id) { case 0x10ec0255: + case 0x10ec0256: alc_write_coef_idx(codec, 0x45, 0xc489); snd_hda_set_pin_ctl_cache(codec, hp_pin, 0); alc_process_coef_fw(codec, coef0255); @@ -3731,6 +3733,7 @@ static void alc_headset_mode_default(struct hda_codec *codec) switch (codec->vendor_id) { case 0x10ec0255: + case 0x10ec0256: alc_process_coef_fw(codec, coef0255); break; case 0x10ec0233: @@ -3785,6 +3788,7 @@ static void alc_headset_mode_ctia(struct hda_codec *codec) switch (codec->vendor_id) { case 0x10ec0255: + case 0x10ec0256: alc_process_coef_fw(codec, coef0255); break; case 0x10ec0233: @@ -3839,6 +3843,7 @@ static void alc_headset_mode_omtp(struct hda_codec *codec) switch (codec->vendor_id) { case 0x10ec0255: + case 0x10ec0256: alc_process_coef_fw(codec, coef0255); break; case 0x10ec0233: @@ -3884,6 +3889,7 @@ static void alc_determine_headset_type(struct hda_codec *codec) switch (codec->vendor_id) { case 0x10ec0255: + case 0x10ec0256: alc_process_coef_fw(codec, coef0255); msleep(300); val = alc_read_coef_idx(codec, 0x46); @@ -5217,6 +5223,16 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x17, 0x40000000}, {0x1d, 0x40700001}, {0x21, 0x02211050}), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x90a60140}, + {0x13, 0x40000000}, + {0x14, 0x90170110}, + {0x19, 0x411111f0}, + {0x1a, 0x411111f0}, + {0x1b, 0x411111f0}, + {0x1d, 0x40700001}, + {0x1e, 0x411111f0}, + {0x21, 0x02211020}), SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4, {0x12, 0x90a60130}, {0x13, 0x40000000}, -- cgit v1.2.1 From 9fc88ad6fd86ff6ce979105c633c58cc3980129c Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 31 Mar 2015 07:34:05 +0000 Subject: ALSA: usb-audio: don't try to get Benchmark DAC1 sample rate Adding this quirk allows us to avoid the noisy "cannot get freq at ep 0x1" message in dmesg output every time playback starts. This ought to affect other Benchmark DAC1 variations using the same "Microchip Technology, Inc." chip as well, but I have only tested with the "Pre" variant. Signed-off-by: Eric Wong Cc: Joe Turner Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 753a47de8459..9a28365126f9 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1113,8 +1113,13 @@ void snd_usb_set_format_quirk(struct snd_usb_substream *subs, bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) { - /* MS Lifecam HD-5000 doesn't support reading the sample rate. */ - return chip->usb_id == USB_ID(0x045E, 0x076D); + /* devices which do not support reading the sample rate. */ + switch (chip->usb_id) { + case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */ + case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ + return true; + } + return false; } /* Marantz/Denon USB DACs need a vendor cmd to switch -- cgit v1.2.1 From a59d7199f62b8336570972dcc288321d0ec999fe Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 8 Apr 2015 16:34:00 +0800 Subject: ALSA: hda/realtek - Make more stable to get pin sense for ALC283 Pin sense will active when power pin is wake up. Power pin will not wake up immediately during resume state. Add some delay to wait for power pin activated. Signed-off-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f75c1274b448..a07da0bbb2b4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2912,6 +2912,8 @@ static void alc283_init(struct hda_codec *codec) if (!hp_pin) return; + + msleep(30); hp_pin_sense = snd_hda_jack_detect(codec, hp_pin); /* Index 0x43 Direct Drive HP AMP LPM Control 1 */ -- cgit v1.2.1