diff options
59 files changed, 839 insertions, 430 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 680fb566b928..8362860e21a7 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -144,8 +144,8 @@ prototypes: void (*kill_sb) (struct super_block *); locking rules: may block BKL -get_sb yes yes -kill_sb yes yes +get_sb yes no +kill_sb yes no ->get_sb() returns error or 0 with locked superblock attached to the vfsmount (exclusive on ->s_umount). @@ -409,12 +409,12 @@ ioctl: yes (see below) unlocked_ioctl: no (see below) compat_ioctl: no mmap: no -open: maybe (see below) +open: no flush: no release: no fsync: no (see below) aio_fsync: no -fasync: yes (see below) +fasync: no lock: yes readv: no writev: no @@ -431,13 +431,6 @@ For many filesystems, it is probably safe to acquire the inode semaphore. Note some filesystems (i.e. remote ones) provide no protection for i_size so you will need to use the BKL. -->open() locking is in-transit: big lock partially moved into the methods. -The only exception is ->open() in the instances of file_operations that never -end up in ->i_fop/->proc_fops, i.e. ones that belong to character devices -(chrdev_open() takes lock before replacing ->f_op and calling the secondary -method. As soon as we fix the handling of module reference counters all -instances of ->open() will be called without the BKL. - Note: ext2_release() was *the* source of contention on fs-intensive loads and dropping BKL on ->release() helps to get rid of that (we still grab BKL for cases when we close a file that had been opened r/w, but that diff --git a/MAINTAINERS b/MAINTAINERS index af279458b614..186be3ba5069 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -750,11 +750,13 @@ P: Ville Syrjala M: syrjala@sci.fi S: Maintained -ATL1 ETHERNET DRIVER +ATLX ETHERNET DRIVERS P: Jay Cliburn M: jcliburn@gmail.com P: Chris Snook M: csnook@redhat.com +P: Jie Yang +M: jie.yang@atheros.com L: atl1-devel@lists.sourceforge.net W: http://sourceforge.net/projects/atl1 W: http://atl1.sourceforge.net @@ -1593,7 +1595,7 @@ S: Supported EMBEDDED LINUX P: Paul Gortmaker M: paul.gortmaker@windriver.com -P David Woodhouse +P: David Woodhouse M: dwmw2@infradead.org L: linux-embedded@vger.kernel.org S: Maintained diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 94a95d7fafd6..71934856fc22 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -61,8 +61,9 @@ extern void __raw_readsl(const void __iomem *addr, void *data, int longlen); #define MT_DEVICE_NONSHARED 1 #define MT_DEVICE_CACHED 2 #define MT_DEVICE_IXP2000 3 +#define MT_DEVICE_WC 4 /* - * types 4 onwards can be found in asm/mach/map.h and are undefined + * types 5 onwards can be found in asm/mach/map.h and are undefined * for ioremap */ @@ -215,11 +216,13 @@ extern void _memset_io(volatile void __iomem *, int, size_t); #define ioremap(cookie,size) __arm_ioremap(cookie, size, MT_DEVICE) #define ioremap_nocache(cookie,size) __arm_ioremap(cookie, size, MT_DEVICE) #define ioremap_cached(cookie,size) __arm_ioremap(cookie, size, MT_DEVICE_CACHED) +#define ioremap_wc(cookie,size) __arm_ioremap(cookie, size, MT_DEVICE_WC) #define iounmap(cookie) __iounmap(cookie) #else #define ioremap(cookie,size) __arch_ioremap((cookie), (size), MT_DEVICE) #define ioremap_nocache(cookie,size) __arch_ioremap((cookie), (size), MT_DEVICE) #define ioremap_cached(cookie,size) __arch_ioremap((cookie), (size), MT_DEVICE_CACHED) +#define ioremap_wc(cookie,size) __arch_ioremap((cookie), (size), MT_DEVICE_WC) #define iounmap(cookie) __arch_iounmap(cookie) #endif diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h index 06f583b13999..9eb936e49cc3 100644 --- a/arch/arm/include/asm/mach/map.h +++ b/arch/arm/include/asm/mach/map.h @@ -18,13 +18,13 @@ struct map_desc { unsigned int type; }; -/* types 0-3 are defined in asm/io.h */ -#define MT_CACHECLEAN 4 -#define MT_MINICLEAN 5 -#define MT_LOW_VECTORS 6 -#define MT_HIGH_VECTORS 7 -#define MT_MEMORY 8 -#define MT_ROM 9 +/* types 0-4 are defined in asm/io.h */ +#define MT_CACHECLEAN 5 +#define MT_MINICLEAN 6 +#define MT_LOW_VECTORS 7 +#define MT_HIGH_VECTORS 8 +#define MT_MEMORY 9 +#define MT_ROM 10 #define MT_NONSHARED_DEVICE MT_DEVICE_NONSHARED #define MT_IXP2000_DEVICE MT_DEVICE_IXP2000 diff --git a/arch/arm/mach-omap1/mcbsp.c b/arch/arm/mach-omap1/mcbsp.c index 826010d5d014..2baeaeb0c900 100644 --- a/arch/arm/mach-omap1/mcbsp.c +++ b/arch/arm/mach-omap1/mcbsp.c @@ -159,6 +159,7 @@ static struct omap_mcbsp_ops omap1_mcbsp_ops = { #ifdef CONFIG_ARCH_OMAP730 static struct omap_mcbsp_platform_data omap730_mcbsp_pdata[] = { { + .phys_base = OMAP730_MCBSP1_BASE, .virt_base = io_p2v(OMAP730_MCBSP1_BASE), .dma_rx_sync = OMAP_DMA_MCBSP1_RX, .dma_tx_sync = OMAP_DMA_MCBSP1_TX, @@ -167,6 +168,7 @@ static struct omap_mcbsp_platform_data omap730_mcbsp_pdata[] = { .ops = &omap1_mcbsp_ops, }, { + .phys_base = OMAP730_MCBSP2_BASE, .virt_base = io_p2v(OMAP730_MCBSP2_BASE), .dma_rx_sync = OMAP_DMA_MCBSP3_RX, .dma_tx_sync = OMAP_DMA_MCBSP3_TX, @@ -184,6 +186,7 @@ static struct omap_mcbsp_platform_data omap730_mcbsp_pdata[] = { #ifdef CONFIG_ARCH_OMAP15XX static struct omap_mcbsp_platform_data omap15xx_mcbsp_pdata[] = { { + .phys_base = OMAP1510_MCBSP1_BASE, .virt_base = OMAP1510_MCBSP1_BASE, .dma_rx_sync = OMAP_DMA_MCBSP1_RX, .dma_tx_sync = OMAP_DMA_MCBSP1_TX, @@ -193,6 +196,7 @@ static struct omap_mcbsp_platform_data omap15xx_mcbsp_pdata[] = { .clk_name = "mcbsp_clk", }, { + .phys_base = OMAP1510_MCBSP2_BASE, .virt_base = io_p2v(OMAP1510_MCBSP2_BASE), .dma_rx_sync = OMAP_DMA_MCBSP2_RX, .dma_tx_sync = OMAP_DMA_MCBSP2_TX, @@ -201,6 +205,7 @@ static struct omap_mcbsp_platform_data omap15xx_mcbsp_pdata[] = { .ops = &omap1_mcbsp_ops, }, { + .phys_base = OMAP1510_MCBSP3_BASE, .virt_base = OMAP1510_MCBSP3_BASE, .dma_rx_sync = OMAP_DMA_MCBSP3_RX, .dma_tx_sync = OMAP_DMA_MCBSP3_TX, @@ -219,6 +224,7 @@ static struct omap_mcbsp_platform_data omap15xx_mcbsp_pdata[] = { #ifdef CONFIG_ARCH_OMAP16XX static struct omap_mcbsp_platform_data omap16xx_mcbsp_pdata[] = { { + .phys_base = OMAP1610_MCBSP1_BASE, .virt_base = OMAP1610_MCBSP1_BASE, .dma_rx_sync = OMAP_DMA_MCBSP1_RX, .dma_tx_sync = OMAP_DMA_MCBSP1_TX, @@ -228,6 +234,7 @@ static struct omap_mcbsp_platform_data omap16xx_mcbsp_pdata[] = { .clk_name = "mcbsp_clk", }, { + .phys_base = OMAP1610_MCBSP2_BASE, .virt_base = io_p2v(OMAP1610_MCBSP2_BASE), .dma_rx_sync = OMAP_DMA_MCBSP2_RX, .dma_tx_sync = OMAP_DMA_MCBSP2_TX, @@ -236,6 +243,7 @@ static struct omap_mcbsp_platform_data omap16xx_mcbsp_pdata[] = { .ops = &omap1_mcbsp_ops, }, { + .phys_base = OMAP1610_MCBSP3_BASE, .virt_base = OMAP1610_MCBSP3_BASE, .dma_rx_sync = OMAP_DMA_MCBSP3_RX, .dma_tx_sync = OMAP_DMA_MCBSP3_TX, diff --git a/arch/arm/mach-omap2/mcbsp.c b/arch/arm/mach-omap2/mcbsp.c index 27eb6e3ca926..b261f1f80b5e 100644 --- a/arch/arm/mach-omap2/mcbsp.c +++ b/arch/arm/mach-omap2/mcbsp.c @@ -134,6 +134,7 @@ static struct omap_mcbsp_ops omap2_mcbsp_ops = { #ifdef CONFIG_ARCH_OMAP24XX static struct omap_mcbsp_platform_data omap24xx_mcbsp_pdata[] = { { + .phys_base = OMAP24XX_MCBSP1_BASE, .virt_base = IO_ADDRESS(OMAP24XX_MCBSP1_BASE), .dma_rx_sync = OMAP24XX_DMA_MCBSP1_RX, .dma_tx_sync = OMAP24XX_DMA_MCBSP1_TX, @@ -143,6 +144,7 @@ static struct omap_mcbsp_platform_data omap24xx_mcbsp_pdata[] = { .clk_name = "mcbsp_clk", }, { + .phys_base = OMAP24XX_MCBSP2_BASE, .virt_base = IO_ADDRESS(OMAP24XX_MCBSP2_BASE), .dma_rx_sync = OMAP24XX_DMA_MCBSP2_RX, .dma_tx_sync = OMAP24XX_DMA_MCBSP2_TX, @@ -161,6 +163,7 @@ static struct omap_mcbsp_platform_data omap24xx_mcbsp_pdata[] = { #ifdef CONFIG_ARCH_OMAP34XX static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = { { + .phys_base = OMAP34XX_MCBSP1_BASE, .virt_base = IO_ADDRESS(OMAP34XX_MCBSP1_BASE), .dma_rx_sync = OMAP24XX_DMA_MCBSP1_RX, .dma_tx_sync = OMAP24XX_DMA_MCBSP1_TX, @@ -170,6 +173,7 @@ static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = { .clk_name = "mcbsp_clk", }, { + .phys_base = OMAP34XX_MCBSP2_BASE, .virt_base = IO_ADDRESS(OMAP34XX_MCBSP2_BASE), .dma_rx_sync = OMAP24XX_DMA_MCBSP2_RX, .dma_tx_sync = OMAP24XX_DMA_MCBSP2_TX, diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 25d9a11eb617..a713e40e1f1a 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -211,6 +211,12 @@ static struct mem_type mem_types[] = { PMD_SECT_TEX(1), .domain = DOMAIN_IO, }, + [MT_DEVICE_WC] = { /* ioremap_wc */ + .prot_pte = PROT_PTE_DEVICE, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PROT_SECT_DEVICE, + .domain = DOMAIN_IO, + }, [MT_CACHECLEAN] = { .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, .domain = DOMAIN_KERNEL, @@ -273,6 +279,20 @@ static void __init build_mem_type_table(void) } /* + * On non-Xscale3 ARMv5-and-older systems, use CB=01 + * (Uncached/Buffered) for ioremap_wc() mappings. On XScale3 + * and ARMv6+, use TEXCB=00100 mappings (Inner/Outer Uncacheable + * in xsc3 parlance, Uncached Normal in ARMv6 parlance). + */ + if (cpu_is_xsc3() || cpu_arch >= CPU_ARCH_ARMv6) { + mem_types[MT_DEVICE_WC].prot_pte_ext |= PTE_EXT_TEX(1); + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); + } else { + mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_BUFFERABLE; + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; + } + + /* * ARMv5 and lower, bit 4 must be set for page tables. * (was: cache "update-able on write" bit on ARM610) * However, Xscale cores require this bit to be cleared. diff --git a/arch/arm/plat-mxc/clock.c b/arch/arm/plat-mxc/clock.c index 2f8627218839..0a38f0b396eb 100644 --- a/arch/arm/plat-mxc/clock.c +++ b/arch/arm/plat-mxc/clock.c @@ -37,7 +37,6 @@ #include <linux/proc_fs.h> #include <linux/semaphore.h> #include <linux/string.h> -#include <linux/version.h> #include <mach/clock.h> diff --git a/arch/arm/plat-omap/gpio.c b/arch/arm/plat-omap/gpio.c index 3e76ee2bc731..9e1341ebc14e 100644 --- a/arch/arm/plat-omap/gpio.c +++ b/arch/arm/plat-omap/gpio.c @@ -1488,7 +1488,7 @@ static int __init _omap_gpio_init(void) bank->chip.set = gpio_set; if (bank_is_mpuio(bank)) { bank->chip.label = "mpuio"; -#ifdef CONFIG_ARCH_OMAP1 +#ifdef CONFIG_ARCH_OMAP16XX bank->chip.dev = &omap_mpuio_device.dev; #endif bank->chip.base = OMAP_MPUIO(0); diff --git a/arch/arm/plat-omap/include/mach/mcbsp.h b/arch/arm/plat-omap/include/mach/mcbsp.h index 6eb44a92871d..8fdb95e26fcd 100644 --- a/arch/arm/plat-omap/include/mach/mcbsp.h +++ b/arch/arm/plat-omap/include/mach/mcbsp.h @@ -315,6 +315,7 @@ struct omap_mcbsp_ops { }; struct omap_mcbsp_platform_data { + unsigned long phys_base; u32 virt_base; u8 dma_rx_sync, dma_tx_sync; u16 rx_irq, tx_irq; @@ -324,6 +325,7 @@ struct omap_mcbsp_platform_data { struct omap_mcbsp { struct device *dev; + unsigned long phys_base; u32 io_base; u8 id; u8 free; diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c index d0844050f2d2..014d26574bb6 100644 --- a/arch/arm/plat-omap/mcbsp.c +++ b/arch/arm/plat-omap/mcbsp.c @@ -651,7 +651,7 @@ int omap_mcbsp_xmit_buffer(unsigned int id, dma_addr_t buffer, omap_set_dma_dest_params(mcbsp[id].dma_tx_lch, src_port, OMAP_DMA_AMODE_CONSTANT, - mcbsp[id].io_base + OMAP_MCBSP_REG_DXR1, + mcbsp[id].phys_base + OMAP_MCBSP_REG_DXR1, 0, 0); omap_set_dma_src_params(mcbsp[id].dma_tx_lch, @@ -712,7 +712,7 @@ int omap_mcbsp_recv_buffer(unsigned int id, dma_addr_t buffer, omap_set_dma_src_params(mcbsp[id].dma_rx_lch, src_port, OMAP_DMA_AMODE_CONSTANT, - mcbsp[id].io_base + OMAP_MCBSP_REG_DRR1, + mcbsp[id].phys_base + OMAP_MCBSP_REG_DRR1, 0, 0); omap_set_dma_dest_params(mcbsp[id].dma_rx_lch, @@ -830,6 +830,7 @@ static int __init omap_mcbsp_probe(struct platform_device *pdev) mcbsp[id].dma_tx_lch = -1; mcbsp[id].dma_rx_lch = -1; + mcbsp[id].phys_base = pdata->phys_base; mcbsp[id].io_base = pdata->virt_base; /* Default I/O is IRQ based */ mcbsp[id].io_type = OMAP_MCBSP_IRQ_IO; diff --git a/arch/avr32/kernel/asm-offsets.c b/arch/avr32/kernel/asm-offsets.c index e4796c67a831..d6a8193a1d2f 100644 --- a/arch/avr32/kernel/asm-offsets.c +++ b/arch/avr32/kernel/asm-offsets.c @@ -4,6 +4,8 @@ * to extract and format the required data. */ +#include <linux/mm.h> +#include <linux/sched.h> #include <linux/thread_info.h> #include <linux/kbuild.h> @@ -17,4 +19,8 @@ void foo(void) OFFSET(TI_rar_saved, thread_info, rar_saved); OFFSET(TI_rsr_saved, thread_info, rsr_saved); OFFSET(TI_restart_block, thread_info, restart_block); + BLANK(); + OFFSET(TSK_active_mm, task_struct, active_mm); + BLANK(); + OFFSET(MM_pgd, mm_struct, pgd); } diff --git a/arch/avr32/kernel/entry-avr32b.S b/arch/avr32/kernel/entry-avr32b.S index 2b398cae110c..33d49377b8be 100644 --- a/arch/avr32/kernel/entry-avr32b.S +++ b/arch/avr32/kernel/entry-avr32b.S @@ -334,9 +334,64 @@ save_full_context_ex: /* Low-level exception handlers */ handle_critical: + /* + * AT32AP700x errata: + * + * After a Java stack overflow or underflow trap, any CPU + * memory access may cause erratic behavior. This will happen + * when the four least significant bits of the JOSP system + * register contains any value between 9 and 15 (inclusive). + * + * Possible workarounds: + * - Don't use the Java Extension Module + * - Ensure that the stack overflow and underflow trap + * handlers do not do any memory access or trigger any + * exceptions before the overflow/underflow condition is + * cleared (by incrementing or decrementing the JOSP) + * - Make sure that JOSP does not contain any problematic + * value before doing any exception or interrupt + * processing. + * - Set up a critical exception handler which writes a + * known-to-be-safe value, e.g. 4, to JOSP before doing + * any further processing. + * + * We'll use the last workaround for now since we cannot + * guarantee that user space processes don't use Java mode. + * Non-well-behaving userland will be terminated with extreme + * prejudice. + */ +#ifdef CONFIG_CPU_AT32AP700X + /* + * There's a chance we can't touch memory, so temporarily + * borrow PTBR to save the stack pointer while we fix things + * up... + */ + mtsr SYSREG_PTBR, sp + mov sp, 4 + mtsr SYSREG_JOSP, sp + mfsr sp, SYSREG_PTBR + sub pc, -2 + + /* Push most of pt_regs on stack. We'll do the rest later */ sub sp, 4 - stmts --sp, r0-lr - rcall save_full_context_ex + pushm r0-r12 + + /* PTBR mirrors current_thread_info()->task->active_mm->pgd */ + get_thread_info r0 + ld.w r1, r0[TI_task] + ld.w r2, r1[TSK_active_mm] + ld.w r3, r2[MM_pgd] + mtsr SYSREG_PTBR, r3 +#else + sub sp, 4 + pushm r0-r12 +#endif + sub r0, sp, -(14 * 4) + mov r1, lr + mfsr r2, SYSREG_RAR_EX + mfsr r3, SYSREG_RSR_EX + pushm r0-r3 + mfsr r12, SYSREG_ECR mov r11, sp rcall do_critical_exception diff --git a/arch/avr32/mach-at32ap/pm-at32ap700x.S b/arch/avr32/mach-at32ap/pm-at32ap700x.S index 5be4de65b209..17503b0ed6c9 100644 --- a/arch/avr32/mach-at32ap/pm-at32ap700x.S +++ b/arch/avr32/mach-at32ap/pm-at32ap700x.S @@ -134,7 +134,7 @@ pm_standby: mov r11, SDRAMC_LPR_LPCB_SELF_RFR bfins r10, r11, 0, 2 /* LPCB <- self Refresh */ sync 0 /* flush write buffer */ - st.w r12[SDRAMC_LPR], r11 /* put SDRAM in self-refresh mode */ + st.w r12[SDRAMC_LPR], r10 /* put SDRAM in self-refresh mode */ ld.w r11, r12[SDRAMC_LPR] unmask_interrupts sleep CPU_SLEEP_FROZEN diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index 7286e4a9fe84..a7acad2bc2f0 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -21,5 +21,8 @@ extern char __start_gate_brl_fsys_bubble_down_patchlist[], __end_gate_brl_fsys_b extern char __start_unwind[], __end_unwind[]; extern char __start_ivt_text[], __end_ivt_text[]; +#undef dereference_function_descriptor +void *dereference_function_descriptor(void *); + #endif /* _ASM_IA64_SECTIONS_H */ diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index 29aad349e0c4..545626f66a4c 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -31,9 +31,11 @@ #include <linux/elf.h> #include <linux/moduleloader.h> #include <linux/string.h> +#include <linux/uaccess.h> #include <linux/vmalloc.h> #include <asm/patch.h> +#include <asm/sections.h> #include <asm/unaligned.h> #define ARCH_MODULE_DEBUG 0 @@ -941,3 +943,13 @@ module_arch_cleanup (struct module *mod) if (mod->arch.core_unw_table) unw_remove_unwind_table(mod->arch.core_unw_table); } + +void *dereference_function_descriptor(void *ptr) +{ + struct fdesc *desc = ptr; + void *p; + + if (!probe_kernel_address(&desc->ip, p)) + ptr = p; + return ptr; +} diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index fdacdd4341c9..44138c3e6ea7 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -47,7 +47,9 @@ #include <linux/string.h> #include <linux/kernel.h> #include <linux/bug.h> +#include <linux/uaccess.h> +#include <asm/sections.h> #include <asm/unwind.h> #if 0 @@ -860,3 +862,15 @@ void module_arch_cleanup(struct module *mod) deregister_unwind_table(mod); module_bug_cleanup(mod); } + +#ifdef CONFIG_64BIT +void *dereference_function_descriptor(void *ptr) +{ + Elf64_Fdesc *desc = ptr; + void *p; + + if (!probe_kernel_address(&desc->addr, p)) + ptr = p; + return ptr; +} +#endif diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 14174aa24074..717a3bc1352e 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -49,7 +49,7 @@ zlib := inffast.c inflate.c inftrees.c zlibheader := inffast.h inffixed.h inflate.h inftrees.h infutil.h zliblinuxheader := zlib.h zconf.h zutil.h -$(addprefix $(obj)/,$(zlib) gunzip_util.o main.o): \ +$(addprefix $(obj)/,$(zlib) cuboot-c2k.o gunzip_util.o main.o prpmc2800.o): \ $(addprefix $(obj)/,$(zliblinuxheader)) $(addprefix $(obj)/,$(zlibheader)) src-libfdt := fdt.c fdt_ro.c fdt_wip.c fdt_sw.c fdt_rw.c fdt_strerror.c diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index 916018e425c4..7710e9e6660f 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -16,6 +16,9 @@ static inline int in_kernel_text(unsigned long addr) return 0; } +#undef dereference_function_descriptor +void *dereference_function_descriptor(void *); + #endif #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index ee6a2982d567..ad79de272ff3 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -21,8 +21,9 @@ #include <linux/err.h> #include <linux/vmalloc.h> #include <linux/bug.h> +#include <linux/uaccess.h> #include <asm/module.h> -#include <asm/uaccess.h> +#include <asm/sections.h> #include <asm/firmware.h> #include <asm/code-patching.h> #include <linux/sort.h> @@ -451,3 +452,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, return 0; } + +void *dereference_function_descriptor(void *ptr) +{ + struct ppc64_opd_entry *desc = ptr; + void *p; + + if (!probe_kernel_address(&desc->funcaddr, p)) + ptr = p; + return ptr; +} diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 1c1b627ee843..67595bc380dc 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -643,9 +643,10 @@ static struct spu *find_victim(struct spu_context *ctx) !(tmp->flags & SPU_CREATE_NOSCHED) && (!victim || tmp->prio > victim->prio)) { victim = spu->ctx; - get_spu_context(victim); } } + if (victim) + get_spu_context(victim); mutex_unlock(&cbe_spu_info[node].list_mutex); if (victim) { @@ -727,17 +728,33 @@ static void spu_schedule(struct spu *spu, struct spu_context *ctx) /* not a candidate for interruptible because it's called either from the scheduler thread or from spu_deactivate */ mutex_lock(&ctx->state_mutex); - __spu_schedule(spu, ctx); + if (ctx->state == SPU_STATE_SAVED) + __spu_schedule(spu, ctx); spu_release(ctx); } -static void spu_unschedule(struct spu *spu, struct spu_context *ctx) +/** + * spu_unschedule - remove a context from a spu, and possibly release it. + * @spu: The SPU to unschedule from + * @ctx: The context currently scheduled on the SPU + * @free_spu Whether to free the SPU for other contexts + * + * Unbinds the context @ctx from the SPU @spu. If @free_spu is non-zero, the + * SPU is made available for other contexts (ie, may be returned by + * spu_get_idle). If this is zero, the caller is expected to schedule another + * context to this spu. + * + * Should be called with ctx->state_mutex held. + */ +static void spu_unschedule(struct spu *spu, struct spu_context *ctx, + int free_spu) { int node = spu->node; mutex_lock(&cbe_spu_info[node].list_mutex); cbe_spu_info[node].nr_active--; - spu->alloc_state = SPU_FREE; + if (free_spu) + spu->alloc_state = SPU_FREE; spu_unbind_context(spu, ctx); ctx->stats.invol_ctx_switch++; spu->stats.invol_ctx_switch++; @@ -837,7 +854,7 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) if (spu) { new = grab_runnable_context(max_prio, spu->node); if (new || force) { - spu_unschedule(spu, ctx); + spu_unschedule(spu, ctx, new == NULL); if (new) { if (new->flags & SPU_CREATE_NOSCHED) wake_up(&new->stop_wq); @@ -910,7 +927,7 @@ static noinline void spusched_tick(struct spu_context *ctx) new = grab_runnable_context(ctx->prio + 1, spu->node); if (new) { - spu_unschedule(spu, ctx); + spu_unschedule(spu, ctx, 0); if (test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags)) spu_add_to_rq(ctx); } else { diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 743ccad61c60..2be166c544ca 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -80,8 +80,6 @@ void smp_bogo(struct seq_file *m) i, cpu_data(i).clock_tick); } -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock); - extern void setup_sparc64_timer(void); static volatile unsigned long callin_flag = 0; @@ -120,9 +118,9 @@ void __cpuinit smp_callin(void) while (!cpu_isset(cpuid, smp_commenced_mask)) rmb(); - spin_lock(&call_lock); + ipi_call_lock(); cpu_set(cpuid, cpu_online_map); - spin_unlock(&call_lock); + ipi_call_unlock(); /* idle thread is expected to have preempt disabled */ preempt_disable(); @@ -1305,10 +1303,6 @@ int __cpu_disable(void) c->core_id = 0; c->proc_id = -1; - spin_lock(&call_lock); - cpu_clear(cpu, cpu_online_map); - spin_unlock(&call_lock); - smp_wmb(); /* Make sure no interrupts point to this cpu. */ @@ -1318,6 +1312,10 @@ int __cpu_disable(void) mdelay(1); local_irq_disable(); + ipi_call_lock(); + cpu_clear(cpu, cpu_online_map); + ipi_call_unlock(); + return 0; } diff --git a/crypto/camellia.c b/crypto/camellia.c index b1cc4de6493c..493fee7e0a8b 100644 --- a/crypto/camellia.c +++ b/crypto/camellia.c @@ -35,8 +35,6 @@ #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> -#include <linux/bitops.h> -#include <asm/unaligned.h> static const u32 camellia_sp1110[256] = { 0x70707000,0x82828200,0x2c2c2c00,0xececec00, @@ -337,6 +335,20 @@ static const u32 camellia_sp4404[256] = { /* * macros */ +#define GETU32(v, pt) \ + do { \ + /* latest breed of gcc is clever enough to use move */ \ + memcpy(&(v), (pt), 4); \ + (v) = be32_to_cpu(v); \ + } while(0) + +/* rotation right shift 1byte */ +#define ROR8(x) (((x) >> 8) + ((x) << 24)) +/* rotation left shift 1bit */ +#define ROL1(x) (((x) << 1) + ((x) >> 31)) +/* rotation left shift 1byte */ +#define ROL8(x) (((x) << 8) + ((x) >> 24)) + #define ROLDQ(ll, lr, rl, rr, w0, w1, bits) \ do { \ w0 = ll; \ @@ -371,7 +383,7 @@ static const u32 camellia_sp4404[256] = { ^ camellia_sp3033[(u8)(il >> 8)] \ ^ camellia_sp4404[(u8)(il )]; \ yl ^= yr; \ - yr = ror32(yr, 8); \ + yr = ROR8(yr); \ yr ^= yl; \ } while(0) @@ -393,7 +405,7 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) subL[7] ^= subL[1]; subR[7] ^= subR[1]; subL[1] ^= subR[1] & ~subR[9]; dw = subL[1] & subL[9], - subR[1] ^= rol32(dw, 1); /* modified for FLinv(kl2) */ + subR[1] ^= ROL1(dw); /* modified for FLinv(kl2) */ /* round 8 */ subL[11] ^= subL[1]; subR[11] ^= subR[1]; /* round 10 */ @@ -402,7 +414,7 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) subL[15] ^= subL[1]; subR[15] ^= subR[1]; subL[1] ^= subR[1] & ~subR[17]; dw = subL[1] & subL[17], - subR[1] ^= rol32(dw, 1); /* modified for FLinv(kl4) */ + subR[1] ^= ROL1(dw); /* modified for FLinv(kl4) */ /* round 14 */ subL[19] ^= subL[1]; subR[19] ^= subR[1]; /* round 16 */ @@ -418,7 +430,7 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) } else { subL[1] ^= subR[1] & ~subR[25]; dw = subL[1] & subL[25], - subR[1] ^= rol32(dw, 1); /* modified for FLinv(kl6) */ + subR[1] ^= ROL1(dw); /* modified for FLinv(kl6) */ /* round 20 */ subL[27] ^= subL[1]; subR[27] ^= subR[1]; /* round 22 */ @@ -438,7 +450,7 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) subL[26] ^= kw4l; subR[26] ^= kw4r; kw4l ^= kw4r & ~subR[24]; dw = kw4l & subL[24], - kw4r ^= rol32(dw, 1); /* modified for FL(kl5) */ + kw4r ^= ROL1(dw); /* modified for FL(kl5) */ } /* round 17 */ subL[22] ^= kw4l; subR[22] ^= kw4r; @@ -448,7 +460,7 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) subL[18] ^= kw4l; subR[18] ^= kw4r; kw4l ^= kw4r & ~subR[16]; dw = kw4l & subL[16], - kw4r ^= rol32(dw, 1); /* modified for FL(kl3) */ + kw4r ^= ROL1(dw); /* modified for FL(kl3) */ /* round 11 */ subL[14] ^= kw4l; subR[14] ^= kw4r; /* round 9 */ @@ -457,7 +469,7 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) subL[10] ^= kw4l; subR[10] ^= kw4r; kw4l ^= kw4r & ~subR[8]; dw = kw4l & subL[8], - kw4r ^= rol32(dw, 1); /* modified for FL(kl1) */ + kw4r ^= ROL1(dw); /* modified for FL(kl1) */ /* round 5 */ subL[6] ^= kw4l; subR[6] ^= kw4r; /* round 3 */ @@ -482,7 +494,7 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) SUBKEY_R(6) = subR[5] ^ subR[7]; tl = subL[10] ^ (subR[10] & ~subR[8]); dw = tl & subL[8], /* FL(kl1) */ - tr = subR[10] ^ rol32(dw, 1); + tr = subR[10] ^ ROL1(dw); SUBKEY_L(7) = subL[6] ^ tl; /* round 6 */ SUBKEY_R(7) = subR[6] ^ tr; SUBKEY_L(8) = subL[8]; /* FL(kl1) */ @@ -491,7 +503,7 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) SUBKEY_R(9) = subR[9]; tl = subL[7] ^ (subR[7] & ~subR[9]); dw = tl & subL[9], /* FLinv(kl2) */ - tr = subR[7] ^ rol32(dw, 1); + tr = subR[7] ^ ROL1(dw); SUBKEY_L(10) = tl ^ subL[11]; /* round 7 */ SUBKEY_R(10) = tr ^ subR[11]; SUBKEY_L(11) = subL[10] ^ subL[12]; /* round 8 */ @@ -504,7 +516,7 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) SUBKEY_R(14) = subR[13] ^ subR[15]; tl = subL[18] ^ (subR[18] & ~subR[16]); dw = tl & subL[16], /* FL(kl3) */ - tr = subR[18] ^ rol32(dw, 1); + tr = subR[18] ^ ROL1(dw); SUBKEY_L(15) = subL[14] ^ tl; /* round 12 */ SUBKEY_R(15) = subR[14] ^ tr; SUBKEY_L(16) = subL[16]; /* FL(kl3) */ @@ -513,7 +525,7 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) SUBKEY_R(17) = subR[17]; tl = subL[15] ^ (subR[15] & ~subR[17]); dw = tl & subL[17], /* FLinv(kl4) */ - tr = subR[15] ^ rol32(dw, 1); + tr = subR[15] ^ ROL1(dw); SUBKEY_L(18) = tl ^ subL[19]; /* round 13 */ SUBKEY_R(18) = tr ^ subR[19]; SUBKEY_L(19) = subL[18] ^ subL[20]; /* round 14 */ @@ -532,7 +544,7 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) } else { tl = subL[26] ^ (subR[26] & ~subR[24]); dw = tl & subL[24], /* FL(kl5) */ - tr = subR[26] ^ rol32(dw, 1); + tr = subR[26] ^ ROL1(dw); SUBKEY_L(23) = subL[22] ^ tl; /* round 18 */ SUBKEY_R(23) = subR[22] ^ tr; SUBKEY_L(24) = subL[24]; /* FL(kl5) */ @@ -541,7 +553,7 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) SUBKEY_R(25) = subR[25]; tl = subL[23] ^ (subR[23] & ~subR[25]); dw = tl & subL[25], /* FLinv(kl6) */ - tr = subR[23] ^ rol32(dw, 1); + tr = subR[23] ^ ROL1(dw); SUBKEY_L(26) = tl ^ subL[27]; /* round 19 */ SUBKEY_R(26) = tr ^ subR[27]; SUBKEY_L(27) = subL[26] ^ subL[28]; /* round 20 */ @@ -561,17 +573,17 @@ static void camellia_setup_tail(u32 *subkey, u32 *subL, u32 *subR, int max) /* apply the inverse of the last half of P-function */ i = 2; do { - dw = SUBKEY_L(i + 0) ^ SUBKEY_R(i + 0); dw = rol32(dw, 8);/* round 1 */ + dw = SUBKEY_L(i + 0) ^ SUBKEY_R(i + 0); dw = ROL8(dw);/* round 1 */ SUBKEY_R(i + 0) = SUBKEY_L(i + 0) ^ dw; SUBKEY_L(i + 0) = dw; - dw = SUBKEY_L(i + 1) ^ SUBKEY_R(i + 1); dw = rol32(dw, 8);/* round 2 */ + dw = SUBKEY_L(i + 1) ^ SUBKEY_R(i + 1); dw = ROL8(dw);/* round 2 */ SUBKEY_R(i + 1) = SUBKEY_L(i + 1) ^ dw; SUBKEY_L(i + 1) = dw; - dw = SUBKEY_L(i + 2) ^ SUBKEY_R(i + 2); dw = rol32(dw, 8);/* round 3 */ + dw = SUBKEY_L(i + 2) ^ SUBKEY_R(i + 2); dw = ROL8(dw);/* round 3 */ SUBKEY_R(i + 2) = SUBKEY_L(i + 2) ^ dw; SUBKEY_L(i + 2) = dw; - dw = SUBKEY_L(i + 3) ^ SUBKEY_R(i + 3); dw = rol32(dw, 8);/* round 4 */ + dw = SUBKEY_L(i + 3) ^ SUBKEY_R(i + 3); dw = ROL8(dw);/* round 4 */ SUBKEY_R(i + 3) = SUBKEY_L(i + 3) ^ dw; SUBKEY_L(i + 3) = dw; - dw = SUBKEY_L(i + 4) ^ SUBKEY_R(i + 4); dw = rol32(dw, 9);/* round 5 */ + dw = SUBKEY_L(i + 4) ^ SUBKEY_R(i + 4); dw = ROL8(dw);/* round 5 */ SUBKEY_R(i + 4) = SUBKEY_L(i + 4) ^ dw; SUBKEY_L(i + 4) = dw; - dw = SUBKEY_L(i + 5) ^ SUBKEY_R(i + 5); dw = rol32(dw, 8);/* round 6 */ + dw = SUBKEY_L(i + 5) ^ SUBKEY_R(i + 5); dw = ROL8(dw);/* round 6 */ SUBKEY_R(i + 5) = SUBKEY_L(i + 5) ^ dw; SUBKEY_L(i + 5) = dw; i += 8; } while (i < max); @@ -587,10 +599,10 @@ static void camellia_setup128(const unsigned char *key, u32 *subkey) /** * k == kll || klr || krl || krr (|| is concatenation) */ - kll = get_unaligned_be32(key); - klr = get_unaligned_be32(key + 4); - krl = get_unaligned_be32(key + 8); - krr = get_unaligned_be32(key + 12); + GETU32(kll, key ); + GETU32(klr, key + 4); + GETU32(krl, key + 8); + GETU32(krr, key + 12); /* generate KL dependent subkeys */ /* kw1 */ @@ -695,14 +707,14 @@ static void camellia_setup256(const unsigned char *key, u32 *subkey) * key = (kll || klr || krl || krr || krll || krlr || krrl || krrr) * (|| is concatenation) */ - kll = get_unaligned_be32(key); - klr = get_unaligned_be32(key + 4); - krl = get_unaligned_be32(key + 8); - krr = get_unaligned_be32(key + 12); - krll = get_unaligned_be32(key + 16); - krlr = get_unaligned_be32(key + 20); - krrl = get_unaligned_be32(key + 24); - krrr = get_unaligned_be32(key + 28); + GETU32(kll, key ); + GETU32(klr, key + 4); + GETU32(krl, key + 8); + GETU32(krr, key + 12); + GETU32(krll, key + 16); + GETU32(krlr, key + 20); + GETU32(krrl, key + 24); + GETU32(krrr, key + 28); /* generate KL dependent subkeys */ /* kw1 */ @@ -858,13 +870,13 @@ static void camellia_setup192(const unsigned char *key, u32 *subkey) t0 &= ll; \ t2 |= rr; \ rl ^= t2; \ - lr ^= rol32(t0, 1); \ + lr ^= ROL1(t0); \ t3 = krl; \ t1 = klr; \ t3 &= rl; \ t1 |= lr; \ ll ^= t1; \ - rr ^= rol32(t3, 1); \ + rr ^= ROL1(t3); \ } while(0) #define CAMELLIA_ROUNDSM(xl, xr, kl, kr, yl, yr, il, ir) \ @@ -880,7 +892,7 @@ static void camellia_setup192(const unsigned char *key, u32 *subkey) il ^= kl; \ ir ^= il ^ kr; \ yl ^= ir; \ - yr ^= ror32(il, 8) ^ ir; \ + yr ^= ROR8(il) ^ ir; \ } while(0) /* max = 24: 128bit encrypt, max = 32: 256bit encrypt */ diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index ae8494944c45..11c8c19f0fb7 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -448,8 +448,10 @@ config PATA_MARVELL tristate "Marvell PATA support via legacy mode" depends on PCI help - This option enables limited support for the Marvell 88SE6145 ATA - controller. + This option enables limited support for the Marvell 88SE61xx ATA + controllers. If you wish to use only the SATA ports then select + the AHCI driver alone. If you wish to the use the PATA port or + both SATA and PATA include this driver. If unsure, say N. diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index c729e6988bbb..2e1a7cb2ed5f 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -420,7 +420,7 @@ static const struct ata_port_info ahci_port_info[] = { /* board_ahci_mv */ { AHCI_HFLAGS (AHCI_HFLAG_NO_NCQ | AHCI_HFLAG_NO_MSI | - AHCI_HFLAG_MV_PATA), + AHCI_HFLAG_MV_PATA | AHCI_HFLAG_NO_PMP), .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | ATA_FLAG_MMIO | ATA_FLAG_PIO_DMA, .pio_mask = 0x1f, /* pio0-4 */ @@ -487,7 +487,9 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x3a05), board_ahci }, /* ICH10 */ { PCI_VDEVICE(INTEL, 0x3a25), board_ahci }, /* ICH10 */ { PCI_VDEVICE(INTEL, 0x3b24), board_ahci }, /* PCH RAID */ + { PCI_VDEVICE(INTEL, 0x3b25), board_ahci }, /* PCH RAID */ { PCI_VDEVICE(INTEL, 0x3b2b), board_ahci }, /* PCH RAID */ + { PCI_VDEVICE(INTEL, 0x3b2c), board_ahci }, /* PCH RAID */ /* JMicron 360/1/3/5/6, match class to avoid IDE function */ { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, @@ -610,6 +612,15 @@ module_param(ahci_em_messages, int, 0444); MODULE_PARM_DESC(ahci_em_messages, "Set AHCI Enclosure Management Message type (0 = disabled, 1 = LED"); +#if defined(CONFIG_PATA_MARVELL) || defined(CONFIG_PATA_MARVELL_MODULE) +static int marvell_enable; +#else +static int marvell_enable = 1; +#endif +module_param(marvell_enable, int, 0644); +MODULE_PARM_DESC(marvell_enable, "Marvell SATA via AHCI (1 = enabled)"); + + static inline int ahci_nr_ports(u32 cap) { return (cap & 0x1f) + 1; @@ -732,6 +743,8 @@ static void ahci_save_initial_config(struct pci_dev *pdev, "MV_AHCI HACK: port_map %x -> %x\n", port_map, port_map & mv); + dev_printk(KERN_ERR, &pdev->dev, + "Disabling your PATA port. Use the boot option 'ahci.marvell_enable=0' to avoid this.\n"); port_map &= mv; } @@ -2533,6 +2546,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (!printed_version++) dev_printk(KERN_DEBUG, &pdev->dev, "version " DRV_VERSION "\n"); + /* The AHCI driver can only drive the SATA ports, the PATA driver + can drive them all so if both drivers are selected make sure + AHCI stays out of the way */ + if (pdev->vendor == PCI_VENDOR_ID_MARVELL && !marvell_enable) + return -ENODEV; + /* acquire resources */ rc = pcim_enable_device(pdev); if (rc) diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 304fdc6f1dc2..2a4c516894f0 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -1315,11 +1315,6 @@ fsm_start: break; case HSM_ST_ERR: - /* make sure qc->err_mask is available to - * know what's wrong and recover - */ - WARN_ON(!(qc->err_mask & (AC_ERR_DEV | AC_ERR_HSM))); - ap->hsm_task_state = HSM_ST_IDLE; /* complete taskfile transaction */ diff --git a/drivers/ata/pata_marvell.c b/drivers/ata/pata_marvell.c index 24a011b25024..0d87eec84966 100644 --- a/drivers/ata/pata_marvell.c +++ b/drivers/ata/pata_marvell.c @@ -20,29 +20,30 @@ #include <linux/ata.h> #define DRV_NAME "pata_marvell" -#define DRV_VERSION "0.1.4" +#define DRV_VERSION "0.1.6" /** - * marvell_pre_reset - check for 40/80 pin - * @link: link - * @deadline: deadline jiffies for the operation + * marvell_pata_active - check if PATA is active + * @pdev: PCI device * - * Perform the PATA port setup we need. + * Returns 1 if the PATA port may be active. We know how to check this + * for the 6145 but not the other devices */ -static int marvell_pre_reset(struct ata_link *link, unsigned long deadline) +static int marvell_pata_active(struct pci_dev *pdev) { - struct ata_port *ap = link->ap; - struct pci_dev *pdev = to_pci_dev(ap->host->dev); + int i; u32 devices; void __iomem *barp; - int i; - /* Check if our port is enabled */ + /* We don't yet know how to do this for other devices */ + if (pdev->device != 0x6145) + return 1; barp = pci_iomap(pdev, 5, 0x10); if (barp == NULL) return -ENOMEM; + printk("BAR5:"); for(i = 0; i <= 0x0F; i++) printk("%02X:%02X ", i, ioread8(barp + i)); @@ -51,9 +52,27 @@ static int marvell_pre_reset(struct ata_link *link, unsigned long deadline) devices = ioread32(barp + 0x0C); pci_iounmap(pdev, barp); - if ((pdev->device == 0x6145) && (ap->port_no == 0) && - (!(devices & 0x10))) /* PATA enable ? */ - return -ENOENT; + if (devices & 0x10) + return 1; + return 0; +} + +/** + * marvell_pre_reset - check for 40/80 pin + * @link: link + * @deadline: deadline jiffies for the operation + * + * Perform the PATA port setup we need. + */ + +static int marvell_pre_reset(struct ata_link *link, unsigned long deadline) +{ + struct ata_port *ap = link->ap; + struct pci_dev *pdev = to_pci_dev(ap->host->dev); + + if (pdev->device == 0x6145 && ap->port_no == 0 && + !marvell_pata_active(pdev)) /* PATA enable ? */ + return -ENOENT; return ata_sff_prereset(link, deadline); } @@ -128,6 +147,12 @@ static int marvell_init_one (struct pci_dev *pdev, const struct pci_device_id *i if (pdev->device == 0x6101) ppi[1] = &ata_dummy_port_info; +#if defined(CONFIG_AHCI) || defined(CONFIG_AHCI_MODULE) + if (!marvell_pata_active(pdev)) { + printk(KERN_INFO DRV_NAME ": PATA port not active, deferring to AHCI driver.\n"); + return -ENODEV; + } +#endif return ata_pci_sff_init_one(pdev, ppi, &marvell_sht, NULL); } diff --git a/drivers/ata/pata_sil680.c b/drivers/ata/pata_sil680.c index 720b8645f58a..e970b227fbce 100644 --- a/drivers/ata/pata_sil680.c +++ b/drivers/ata/pata_sil680.c @@ -322,9 +322,6 @@ static int __devinit sil680_init_one(struct pci_dev *pdev, /* Try to acquire MMIO resources and fallback to PIO if * that fails */ - rc = pcim_enable_device(pdev); - if (rc) - return rc; rc = pcim_iomap_regions(pdev, 1 << SIL680_MMIO_BAR, DRV_NAME); if (rc) goto use_ioports; diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 13c1d2af18ac..c815f8ecf6e6 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -667,7 +667,8 @@ static const struct pci_device_id mv_pci_tbl[] = { { PCI_VDEVICE(MARVELL, 0x5041), chip_504x }, { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 }, { PCI_VDEVICE(MARVELL, 0x5081), chip_508x }, - /* RocketRAID 1740/174x have different identifiers */ + /* RocketRAID 1720/174x have different identifiers */ + { PCI_VDEVICE(TTI, 0x1720), chip_6042 }, { PCI_VDEVICE(TTI, 0x1740), chip_508x }, { PCI_VDEVICE(TTI, 0x1742), chip_508x }, diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index 858f70610eda..1e1f3f3757ae 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -309,8 +309,6 @@ static void nv_nf2_freeze(struct ata_port *ap); static void nv_nf2_thaw(struct ata_port *ap); static void nv_ck804_freeze(struct ata_port *ap); static void nv_ck804_thaw(struct ata_port *ap); -static int nv_hardreset(struct ata_link *link, unsigned int *class, - unsigned long deadline); static int nv_adma_slave_config(struct scsi_device *sdev); static int nv_adma_check_atapi_dma(struct ata_queued_cmd *qc); static void nv_adma_qc_prep(struct ata_queued_cmd *qc); @@ -407,7 +405,7 @@ static struct scsi_host_template nv_swncq_sht = { static struct ata_port_operations nv_generic_ops = { .inherits = &ata_bmdma_port_ops, - .hardreset = nv_hardreset, + .hardreset = ATA_OP_NULL, .scr_read = nv_scr_read, .scr_write = nv_scr_write, }; @@ -1588,21 +1586,6 @@ static void nv_mcp55_thaw(struct ata_port *ap) ata_sff_thaw(ap); } -static int nv_hardreset(struct ata_link *link, unsigned int *class, - unsigned long deadline) -{ - int rc; - - /* SATA hardreset fails to retrieve proper device signature on - * some controllers. Request follow up SRST. For more info, - * see http://bugzilla.kernel.org/show_bug.cgi?id=3352 - */ - rc = sata_sff_hardreset(link, class, deadline); - if (rc) - return rc; - return -EAGAIN; -} - static void nv_adma_error_handler(struct ata_port *ap) { struct nv_adma_port_priv *pp = ap->private_data; diff --git a/drivers/usb/host/ohci-omap.c b/drivers/usb/host/ohci-omap.c index 1eb64d08b60a..95b3ec89c126 100644 --- a/drivers/usb/host/ohci-omap.c +++ b/drivers/usb/host/ohci-omap.c @@ -208,7 +208,7 @@ static int ohci_omap_init(struct usb_hcd *hcd) if (cpu_is_omap16xx()) ocpi_enable(); -#ifdef CONFIG_ARCH_OMAP_OTG +#ifdef CONFIG_USB_OTG if (need_transceiver) { ohci->transceiver = otg_get_transceiver(); if (ohci->transceiver) { diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c index 7b74238ad1c7..e980766bb84b 100644 --- a/drivers/usb/serial/console.c +++ b/drivers/usb/serial/console.c @@ -161,7 +161,7 @@ static int usb_console_setup(struct console *co, char *options) if (serial->type->set_termios) { termios->c_cflag = cflag; tty_termios_encode_baud_rate(termios, baud, baud); - serial->type->set_termios(NULL, port, &dummy); + serial->type->set_termios(tty, port, &dummy); port->port.tty = NULL; kfree(termios); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 9abcd2b329f7..e9b20173fef3 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1279,6 +1279,12 @@ static int nfs_parse_mount_options(char *raw, } } + if (errors > 0) { + dfprintk(MOUNT, "NFS: parsing encountered %d error%s\n", + errors, (errors == 1 ? "" : "s")); + if (!sloppy) + return 0; + } return 1; out_nomem: diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 154098157473..73db464cd08b 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -302,18 +302,6 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs) int subtract_lebs; long long available; - /* - * Force the amount available to the total size reported if the used - * space is zero. - */ - if (c->lst.total_used <= UBIFS_INO_NODE_SZ && - c->budg_data_growth + c->budg_dd_growth == 0) { - /* Do the same calculation as for c->block_cnt */ - available = c->main_lebs - 2; - available *= c->leb_size - c->dark_wm; - return available; - } - available = c->main_bytes - c->lst.total_used; /* @@ -714,34 +702,106 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, } /** - * ubifs_budg_get_free_space - return amount of free space. + * ubifs_reported_space - calculate reported free space. + * @c: the UBIFS file-system description object + * @free: amount of free space + * + * This function calculates amount of free space which will be reported to + * user-space. User-space application tend to expect that if the file-system + * (e.g., via the 'statfs()' call) reports that it has N bytes available, they + * are able to write a file of size N. UBIFS attaches node headers to each data + * node and it has to write indexind nodes as well. This introduces additional + * overhead, and UBIFS it has to report sligtly less free space to meet the + * above expectetion. + * + * This function assumes free space is made up of uncompressed data nodes and + * full index nodes (one per data node, tripled because we always allow enough + * space to write the index thrice). + * + * Note, the calculation is pessimistic, which means that most of the time + * UBIFS reports less space than it actually has. + */ +long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) +{ + int divisor, factor, f; + + /* + * Reported space size is @free * X, where X is UBIFS block size + * divided by UBIFS block size + all overhead one data block + * introduces. The overhead is the node header + indexing overhead. + * + * Indexing overhead calculations are based on the following formula: + * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number + * of data nodes, f - fanout. Because effective UBIFS fanout is twice + * as less than maximum fanout, we assume that each data node + * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes. + * Note, the multiplier 3 is because UBIFS reseves thrice as more space + * for the index. + */ + f = c->fanout > 3 ? c->fanout >> 1 : 2; + factor = UBIFS_BLOCK_SIZE; + divisor = UBIFS_MAX_DATA_NODE_SZ; + divisor += (c->max_idx_node_sz * 3) / (f - 1); + free *= factor; + do_div(free, divisor); + return free; +} + +/** + * ubifs_get_free_space - return amount of free space. * @c: UBIFS file-system description object * - * This function returns amount of free space on the file-system. + * This function calculates amount of free space to report to user-space. + * + * Because UBIFS may introduce substantial overhead (the index, node headers, + * alighment, wastage at the end of eraseblocks, etc), it cannot report real + * amount of free flash space it has (well, because not all dirty space is + * reclamable, UBIFS does not actually know the real amount). If UBIFS did so, + * it would bread user expectetion about what free space is. Users seem to + * accustomed to assume that if the file-system reports N bytes of free space, + * they would be able to fit a file of N bytes to the FS. This almost works for + * traditional file-systems, because they have way less overhead than UBIFS. + * So, to keep users happy, UBIFS tries to take the overhead into account. */ -long long ubifs_budg_get_free_space(struct ubifs_info *c) +long long ubifs_get_free_space(struct ubifs_info *c) { - int min_idx_lebs, rsvd_idx_lebs; + int min_idx_lebs, rsvd_idx_lebs, lebs; long long available, outstanding, free; - /* Do exactly the same calculations as in 'do_budget_space()' */ spin_lock(&c->space_lock); min_idx_lebs = ubifs_calc_min_idx_lebs(c); + outstanding = c->budg_data_growth + c->budg_dd_growth; - if (min_idx_lebs > c->lst.idx_lebs) - rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; - else - rsvd_idx_lebs = 0; - - if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - - c->lst.taken_empty_lebs) { + /* + * Force the amount available to the total size reported if the used + * space is zero. + */ + if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) { spin_unlock(&c->space_lock); - return 0; + return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT; } available = ubifs_calc_available(c, min_idx_lebs); - outstanding = c->budg_data_growth + c->budg_dd_growth; - c->min_idx_lebs = min_idx_lebs; + + /* + * When reporting free space to user-space, UBIFS guarantees that it is + * possible to write a file of free space size. This means that for + * empty LEBs we may use more precise calculations than + * 'ubifs_calc_available()' is using. Namely, we know that in empty + * LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm. + * Thus, amend the available space. + * + * Note, the calculations below are similar to what we have in + * 'do_budget_space()', so refer there for comments. + */ + if (min_idx_lebs > c->lst.idx_lebs) + rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; + else + rsvd_idx_lebs = 0; + lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - + c->lst.taken_empty_lebs; + lebs -= rsvd_idx_lebs; + available += lebs * (c->dark_wm - c->leb_overhead); spin_unlock(&c->space_lock); if (available > outstanding) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 5c96f1fb7016..2b267c9a1806 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -587,7 +587,6 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) if (err) { if (err != -ENOSPC) return err; - err = 0; budgeted = 0; } diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 4071d1cae29f..3d698e2022b1 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -793,7 +793,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, int err; struct ubifs_budget_req req; loff_t old_size = inode->i_size, new_size = attr->ia_size; - int offset = new_size & (UBIFS_BLOCK_SIZE - 1); + int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1; struct ubifs_inode *ui = ubifs_inode(inode); dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); @@ -811,8 +811,15 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, /* A funny way to budget for truncation node */ req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; err = ubifs_budget_space(c, &req); - if (err) - return err; + if (err) { + /* + * Treat truncations to zero as deletion and always allow them, + * just like we do for '->unlink()'. + */ + if (new_size || err != -ENOSPC) + return err; + budgeted = 0; + } err = vmtruncate(inode, new_size); if (err) @@ -869,7 +876,12 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, err = ubifs_jnl_truncate(c, inode, old_size, new_size); mutex_unlock(&ui->ui_mutex); out_budg: - ubifs_release_budget(c, &req); + if (budgeted) + ubifs_release_budget(c, &req); + else { + c->nospace = c->nospace_rp = 0; + smp_wmb(); + } return err; } diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index adee7b5ddeab..e045c8b55423 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -211,14 +211,8 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, * dirty index heap, and it falls-back to LPT scanning if the heaps are empty * or do not have an LEB which satisfies the @min_space criteria. * - * Note: - * o LEBs which have less than dead watermark of dirty space are never picked - * by this function; - * - * Returns zero and the LEB properties of - * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a - * negative error code in case of other failures. The returned LEB is marked as - * "taken". + * Note, LEBs which have less than dead watermark of free + dirty space are + * never picked by this function. * * The additional @pick_free argument controls if this function has to return a * free or freeable LEB if one is present. For example, GC must to set it to %1, @@ -231,6 +225,10 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, * * In addition @pick_free is set to %2 by the recovery process in order to * recover gc_lnum in which case an index LEB must not be returned. + * + * This function returns zero and the LEB properties of found dirty LEB in case + * of success, %-ENOSPC if no dirty LEB was found and a negative error code in + * case of other failures. The returned LEB is marked as "taken". */ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, int min_space, int pick_free) @@ -245,7 +243,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, int lebs, rsvd_idx_lebs = 0; spin_lock(&c->space_lock); - lebs = c->lst.empty_lebs; + lebs = c->lst.empty_lebs + c->idx_gc_cnt; lebs += c->freeable_cnt - c->lst.taken_empty_lebs; /* @@ -317,7 +315,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, lp = idx_lp; if (lp) { - ubifs_assert(lp->dirty >= c->dead_wm); + ubifs_assert(lp->free + lp->dirty >= c->dead_wm); goto found; } diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index d0f3dac29081..13f1019c859f 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -344,6 +344,12 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) if (err) goto out; + /* Allow for races with TNC */ + c->gced_lnum = lnum; + smp_wmb(); + c->gc_seq += 1; + smp_wmb(); + if (c->gc_lnum == -1) { c->gc_lnum = lnum; err = LEB_RETAINED; diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 87dabf9fe742..4c12a9215d7f 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h @@ -284,38 +284,6 @@ static inline void *ubifs_idx_key(const struct ubifs_info *c, } /** - * ubifs_reported_space - calculate reported free space. - * @c: the UBIFS file-system description object - * @free: amount of free space - * - * This function calculates amount of free space which will be reported to - * user-space. User-space application tend to expect that if the file-system - * (e.g., via the 'statfs()' call) reports that it has N bytes available, they - * are able to write a file of size N. UBIFS attaches node headers to each data - * node and it has to write indexind nodes as well. This introduces additional - * overhead, and UBIFS it has to report sligtly less free space to meet the - * above expectetion. - * - * This function assumes free space is made up of uncompressed data nodes and - * full index nodes (one per data node, doubled because we always allow enough - * space to write the index twice). - * - * Note, the calculation is pessimistic, which means that most of the time - * UBIFS reports less space than it actually has. - */ -static inline long long ubifs_reported_space(const struct ubifs_info *c, - uint64_t free) -{ - int divisor, factor; - - divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz * 3); - factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ; - do_div(free, divisor); - - return free * factor; -} - -/** * ubifs_current_time - round current time to time granularity. * @inode: inode */ @@ -325,4 +293,21 @@ static inline struct timespec ubifs_current_time(struct inode *inode) current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; } +/** + * ubifs_tnc_lookup - look up a file-system node. + * @c: UBIFS file-system description object + * @key: node key to lookup + * @node: the node is returned here + * + * This function look up and reads node with key @key. The caller has to make + * sure the @node buffer is large enough to fit the node. Returns zero in case + * of success, %-ENOENT if the node was not found, and a negative error code in + * case of failure. + */ +static inline int ubifs_tnc_lookup(struct ubifs_info *c, + const union ubifs_key *key, void *node) +{ + return ubifs_tnc_locate(c, key, node, NULL, NULL); +} + #endif /* __UBIFS_MISC_H__ */ diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index f71e6b8822c4..7562464ac83f 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -370,8 +370,9 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct ubifs_info *c = dentry->d_sb->s_fs_info; unsigned long long free; + __le32 *uuid = (__le32 *)c->uuid; - free = ubifs_budg_get_free_space(c); + free = ubifs_get_free_space(c); dbg_gen("free space %lld bytes (%lld blocks)", free, free >> UBIFS_BLOCK_SHIFT); @@ -386,7 +387,8 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = 0; buf->f_ffree = 0; buf->f_namelen = UBIFS_MAX_NLEN; - + buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]); + buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]); return 0; } @@ -530,6 +532,12 @@ static int init_constants_early(struct ubifs_info *c) c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); + /* + * Calculate how many bytes would be wasted at the end of LEB if it was + * fully filled with data nodes of maximum size. This is used in + * calculations when reporting free space. + */ + c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; return 0; } @@ -647,13 +655,11 @@ static int init_constants_late(struct ubifs_info *c) * internally because it does not make much sense for UBIFS, but it is * necessary to report something for the 'statfs()' call. * - * Subtract the LEB reserved for GC and the LEB which is reserved for - * deletions. - * - * Review 'ubifs_calc_available()' if changing this calculation. + * Subtract the LEB reserved for GC, the LEB which is reserved for + * deletions, and assume only one journal head is available. */ - tmp64 = c->main_lebs - 2; - tmp64 *= (uint64_t)c->leb_size - c->dark_wm; + tmp64 = c->main_lebs - 2 - c->jhead_cnt + 1; + tmp64 *= (uint64_t)c->leb_size - c->leb_overhead; tmp64 = ubifs_reported_space(c, tmp64); c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index e909f4a96443..7da209ab9378 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -506,7 +506,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, if (keys_cmp(c, key, &node_key) != 0) ret = 0; } - if (ret == 0) + if (ret == 0 && c->replaying) dbg_mnt("dangling branch LEB %d:%d len %d, key %s", zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); return ret; @@ -1382,50 +1382,39 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, } /** - * ubifs_tnc_lookup - look up a file-system node. + * maybe_leb_gced - determine if a LEB may have been garbage collected. * @c: UBIFS file-system description object - * @key: node key to lookup - * @node: the node is returned here + * @lnum: LEB number + * @gc_seq1: garbage collection sequence number * - * This function look up and reads node with key @key. The caller has to make - * sure the @node buffer is large enough to fit the node. Returns zero in case - * of success, %-ENOENT if the node was not found, and a negative error code in - * case of failure. + * This function determines if @lnum may have been garbage collected since + * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise + * %0 is returned. */ -int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, - void *node) +static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1) { - int found, n, err; - struct ubifs_znode *znode; - struct ubifs_zbranch zbr, *zt; + int gc_seq2, gced_lnum; - mutex_lock(&c->tnc_mutex); - found = ubifs_lookup_level0(c, key, &znode, &n); - if (!found) { - err = -ENOENT; - goto out; - } else if (found < 0) { - err = found; - goto out; - } - zt = &znode->zbranch[n]; - if (is_hash_key(c, key)) { - /* - * In this case the leaf node cache gets used, so we pass the - * address of the zbranch and keep the mutex locked - */ - err = tnc_read_node_nm(c, zt, node); - goto out; - } - zbr = znode->zbranch[n]; - mutex_unlock(&c->tnc_mutex); - - err = ubifs_tnc_read_node(c, &zbr, node); - return err; - -out: - mutex_unlock(&c->tnc_mutex); - return err; + gced_lnum = c->gced_lnum; + smp_rmb(); + gc_seq2 = c->gc_seq; + /* Same seq means no GC */ + if (gc_seq1 == gc_seq2) + return 0; + /* Different by more than 1 means we don't know */ + if (gc_seq1 + 1 != gc_seq2) + return 1; + /* + * We have seen the sequence number has increased by 1. Now we need to + * be sure we read the right LEB number, so read it again. + */ + smp_rmb(); + if (gced_lnum != c->gced_lnum) + return 1; + /* Finally we can check lnum */ + if (gced_lnum == lnum) + return 1; + return 0; } /** @@ -1436,16 +1425,19 @@ out: * @lnum: LEB number is returned here * @offs: offset is returned here * - * This function is the same as 'ubifs_tnc_lookup()' but it returns the node - * location also. See 'ubifs_tnc_lookup()'. + * This function look up and reads node with key @key. The caller has to make + * sure the @node buffer is large enough to fit the node. Returns zero in case + * of success, %-ENOENT if the node was not found, and a negative error code in + * case of failure. The node location can be returned in @lnum and @offs. */ int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, void *node, int *lnum, int *offs) { - int found, n, err; + int found, n, err, safely = 0, gc_seq1; struct ubifs_znode *znode; struct ubifs_zbranch zbr, *zt; +again: mutex_lock(&c->tnc_mutex); found = ubifs_lookup_level0(c, key, &znode, &n); if (!found) { @@ -1456,24 +1448,43 @@ int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, goto out; } zt = &znode->zbranch[n]; + if (lnum) { + *lnum = zt->lnum; + *offs = zt->offs; + } if (is_hash_key(c, key)) { /* * In this case the leaf node cache gets used, so we pass the * address of the zbranch and keep the mutex locked */ - *lnum = zt->lnum; - *offs = zt->offs; err = tnc_read_node_nm(c, zt, node); goto out; } + if (safely) { + err = ubifs_tnc_read_node(c, zt, node); + goto out; + } + /* Drop the TNC mutex prematurely and race with garbage collection */ zbr = znode->zbranch[n]; + gc_seq1 = c->gc_seq; mutex_unlock(&c->tnc_mutex); - *lnum = zbr.lnum; - *offs = zbr.offs; + if (ubifs_get_wbuf(c, zbr.lnum)) { + /* We do not GC journal heads */ + err = ubifs_tnc_read_node(c, &zbr, node); + return err; + } - err = ubifs_tnc_read_node(c, &zbr, node); - return err; + err = fallible_read_node(c, key, &zbr, node); + if (maybe_leb_gced(c, zbr.lnum, gc_seq1)) { + /* + * The node may have been GC'ed out from under us so try again + * while keeping the TNC mutex locked. + */ + safely = 1; + goto again; + } + return 0; out: mutex_unlock(&c->tnc_mutex); @@ -1498,7 +1509,6 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, { int found, n, err; struct ubifs_znode *znode; - struct ubifs_zbranch zbr; dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); mutex_lock(&c->tnc_mutex); @@ -1522,11 +1532,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, goto out_unlock; } - zbr = znode->zbranch[n]; - mutex_unlock(&c->tnc_mutex); - - err = tnc_read_node_nm(c, &zbr, node); - return err; + err = tnc_read_node_nm(c, &znode->zbranch[n], node); out_unlock: mutex_unlock(&c->tnc_mutex); diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index bd2121f3426e..a9ecbd9af20d 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h @@ -87,7 +87,7 @@ #define UBIFS_SK_LEN 8 /* Minimum index tree fanout */ -#define UBIFS_MIN_FANOUT 2 +#define UBIFS_MIN_FANOUT 3 /* Maximum number of levels in UBIFS indexing B-tree */ #define UBIFS_MAX_LEVELS 512 diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index d7f706f7a302..17c620b93eec 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -995,6 +995,9 @@ struct ubifs_mount_opts { * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary * @max_inode_sz: maximum possible inode size in bytes * @max_znode_sz: size of znode in bytes + * + * @leb_overhead: how many bytes are wasted in an LEB when it is filled with + * data nodes of maximum size - used in free space reporting * @dead_wm: LEB dead space watermark * @dark_wm: LEB dark space watermark * @block_cnt: count of 4KiB blocks on the FS @@ -1028,6 +1031,8 @@ struct ubifs_mount_opts { * @sbuf: a buffer of LEB size used by GC and replay for scanning * @idx_gc: list of index LEBs that have been garbage collected * @idx_gc_cnt: number of elements on the idx_gc list + * @gc_seq: incremented for every non-index LEB garbage collected + * @gced_lnum: last non-index LEB that was garbage collected * * @infos_list: links all 'ubifs_info' objects * @umount_mutex: serializes shrinker and un-mount @@ -1224,6 +1229,8 @@ struct ubifs_info { int max_idx_node_sz; long long max_inode_sz; int max_znode_sz; + + int leb_overhead; int dead_wm; int dark_wm; int block_cnt; @@ -1257,6 +1264,8 @@ struct ubifs_info { void *sbuf; struct list_head idx_gc; int idx_gc_cnt; + volatile int gc_seq; + volatile int gced_lnum; struct list_head infos_list; struct mutex umount_mutex; @@ -1434,9 +1443,10 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode, struct ubifs_budget_req *req); void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, struct ubifs_budget_req *req); -long long ubifs_budg_get_free_space(struct ubifs_info *c); +long long ubifs_get_free_space(struct ubifs_info *c); int ubifs_calc_min_idx_lebs(struct ubifs_info *c); void ubifs_convert_page_budget(struct ubifs_info *c); +long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free); long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); /* find.c */ @@ -1451,8 +1461,6 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c); /* tnc.c */ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, struct ubifs_znode **zn, int *n); -int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, - void *node); int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, void *node, const struct qstr *nm); int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 8feeae1f2369..79a7ff925bf8 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -14,4 +14,10 @@ extern char __kprobes_text_start[], __kprobes_text_end[]; extern char __initdata_begin[], __initdata_end[]; extern char __start_rodata[], __end_rodata[]; +/* function descriptor handling (if any). Override + * in asm/sections.h */ +#ifndef dereference_function_descriptor +#define dereference_function_descriptor(p) (p) +#endif + #endif /* _ASM_GENERIC_SECTIONS_H_ */ diff --git a/include/asm-parisc/sections.h b/include/asm-parisc/sections.h index fdd43ec42ec5..9d13c3507ad6 100644 --- a/include/asm-parisc/sections.h +++ b/include/asm-parisc/sections.h @@ -4,4 +4,9 @@ /* nothing to see, move along */ #include <asm-generic/sections.h> +#ifdef CONFIG_64BIT +#undef dereference_function_descriptor +void *dereference_function_descriptor(void *); +#endif + #endif diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index e8f450c499b0..2691926fb506 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -160,7 +160,7 @@ static inline int current_cpuset_is_being_rebound(void) static inline void rebuild_sched_domains(void) { - partition_sched_domains(0, NULL, NULL); + partition_sched_domains(1, NULL, NULL); } #endif /* !CONFIG_CPUSETS */ diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 95c660c9719b..91324908fccd 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -208,6 +208,9 @@ extern void inet_twsk_schedule(struct inet_timewait_sock *tw, extern void inet_twsk_deschedule(struct inet_timewait_sock *tw, struct inet_timewait_death_row *twdr); +extern void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, + struct inet_timewait_death_row *twdr, int family); + static inline struct net *twsk_net(const struct inet_timewait_sock *twsk) { diff --git a/kernel/cpuset.c b/kernel/cpuset.c index d5ab79cf516d..f227bc172690 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -14,6 +14,8 @@ * 2003-10-22 Updates by Stephen Hemminger. * 2004 May-July Rework by Paul Jackson. * 2006 Rework by Paul Menage to use generic cgroups + * 2008 Rework of the scheduler domains and CPU hotplug handling + * by Max Krasnyansky * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of the Linux @@ -236,9 +238,11 @@ static struct cpuset top_cpuset = { static DEFINE_MUTEX(callback_mutex); -/* This is ugly, but preserves the userspace API for existing cpuset +/* + * This is ugly, but preserves the userspace API for existing cpuset * users. If someone tries to mount the "cpuset" filesystem, we - * silently switch it to mount "cgroup" instead */ + * silently switch it to mount "cgroup" instead + */ static int cpuset_get_sb(struct file_system_type *fs_type, int flags, const char *unused_dev_name, void *data, struct vfsmount *mnt) @@ -473,10 +477,9 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) } /* - * Helper routine for rebuild_sched_domains(). + * Helper routine for generate_sched_domains(). * Do cpusets a, b have overlapping cpus_allowed masks? */ - static int cpusets_overlap(struct cpuset *a, struct cpuset *b) { return cpus_intersects(a->cpus_allowed, b->cpus_allowed); @@ -518,26 +521,15 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) } /* - * rebuild_sched_domains() - * - * This routine will be called to rebuild the scheduler's dynamic - * sched domains: - * - if the flag 'sched_load_balance' of any cpuset with non-empty - * 'cpus' changes, - * - or if the 'cpus' allowed changes in any cpuset which has that - * flag enabled, - * - or if the 'sched_relax_domain_level' of any cpuset which has - * that flag enabled and with non-empty 'cpus' changes, - * - or if any cpuset with non-empty 'cpus' is removed, - * - or if a cpu gets offlined. - * - * This routine builds a partial partition of the systems CPUs - * (the set of non-overlappping cpumask_t's in the array 'part' - * below), and passes that partial partition to the kernel/sched.c - * partition_sched_domains() routine, which will rebuild the - * schedulers load balancing domains (sched domains) as specified - * by that partial partition. A 'partial partition' is a set of - * non-overlapping subsets whose union is a subset of that set. + * generate_sched_domains() + * + * This function builds a partial partition of the systems CPUs + * A 'partial partition' is a set of non-overlapping subsets whose + * union is a subset of that set. + * The output of this function needs to be passed to kernel/sched.c + * partition_sched_domains() routine, which will rebuild the scheduler's + * load balancing domains (sched domains) as specified by that partial + * partition. * * See "What is sched_load_balance" in Documentation/cpusets.txt * for a background explanation of this. @@ -547,13 +539,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) * domains when operating in the severe memory shortage situations * that could cause allocation failures below. * - * Call with cgroup_mutex held. May take callback_mutex during - * call due to the kfifo_alloc() and kmalloc() calls. May nest - * a call to the get_online_cpus()/put_online_cpus() pair. - * Must not be called holding callback_mutex, because we must not - * call get_online_cpus() while holding callback_mutex. Elsewhere - * the kernel nests callback_mutex inside get_online_cpus() calls. - * So the reverse nesting would risk an ABBA deadlock. + * Must be called with cgroup_lock held. * * The three key local variables below are: * q - a linked-list queue of cpuset pointers, used to implement a @@ -588,10 +574,10 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) * element of the partition (one sched domain) to be passed to * partition_sched_domains(). */ - -void rebuild_sched_domains(void) +static int generate_sched_domains(cpumask_t **domains, + struct sched_domain_attr **attributes) { - LIST_HEAD(q); /* queue of cpusets to be scanned*/ + LIST_HEAD(q); /* queue of cpusets to be scanned */ struct cpuset *cp; /* scans q */ struct cpuset **csa; /* array of all cpuset ptrs */ int csn; /* how many cpuset ptrs in csa so far */ @@ -601,23 +587,26 @@ void rebuild_sched_domains(void) int ndoms; /* number of sched domains in result */ int nslot; /* next empty doms[] cpumask_t slot */ - csa = NULL; + ndoms = 0; doms = NULL; dattr = NULL; + csa = NULL; /* Special case for the 99% of systems with one, full, sched domain */ if (is_sched_load_balance(&top_cpuset)) { - ndoms = 1; doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL); if (!doms) - goto rebuild; + goto done; + dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL); if (dattr) { *dattr = SD_ATTR_INIT; update_domain_attr_tree(dattr, &top_cpuset); } *doms = top_cpuset.cpus_allowed; - goto rebuild; + + ndoms = 1; + goto done; } csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL); @@ -680,61 +669,141 @@ restart: } } - /* Convert <csn, csa> to <ndoms, doms> */ + /* + * Now we know how many domains to create. + * Convert <csn, csa> to <ndoms, doms> and populate cpu masks. + */ doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL); - if (!doms) - goto rebuild; + if (!doms) { + ndoms = 0; + goto done; + } + + /* + * The rest of the code, including the scheduler, can deal with + * dattr==NULL case. No need to abort if alloc fails. + */ dattr = kmalloc(ndoms * sizeof(struct sched_domain_attr), GFP_KERNEL); for (nslot = 0, i = 0; i < csn; i++) { struct cpuset *a = csa[i]; + cpumask_t *dp; int apn = a->pn; - if (apn >= 0) { - cpumask_t *dp = doms + nslot; - - if (nslot == ndoms) { - static int warnings = 10; - if (warnings) { - printk(KERN_WARNING - "rebuild_sched_domains confused:" - " nslot %d, ndoms %d, csn %d, i %d," - " apn %d\n", - nslot, ndoms, csn, i, apn); - warnings--; - } - continue; + if (apn < 0) { + /* Skip completed partitions */ + continue; + } + + dp = doms + nslot; + + if (nslot == ndoms) { + static int warnings = 10; + if (warnings) { + printk(KERN_WARNING + "rebuild_sched_domains confused:" + " nslot %d, ndoms %d, csn %d, i %d," + " apn %d\n", + nslot, ndoms, csn, i, apn); + warnings--; } + continue; + } - cpus_clear(*dp); - if (dattr) - *(dattr + nslot) = SD_ATTR_INIT; - for (j = i; j < csn; j++) { - struct cpuset *b = csa[j]; - - if (apn == b->pn) { - cpus_or(*dp, *dp, b->cpus_allowed); - b->pn = -1; - if (dattr) - update_domain_attr_tree(dattr - + nslot, b); - } + cpus_clear(*dp); + if (dattr) + *(dattr + nslot) = SD_ATTR_INIT; + for (j = i; j < csn; j++) { + struct cpuset *b = csa[j]; + + if (apn == b->pn) { + cpus_or(*dp, *dp, b->cpus_allowed); + if (dattr) + update_domain_attr_tree(dattr + nslot, b); + + /* Done with this partition */ + b->pn = -1; } - nslot++; } + nslot++; } BUG_ON(nslot != ndoms); -rebuild: - /* Have scheduler rebuild sched domains */ +done: + kfree(csa); + + *domains = doms; + *attributes = dattr; + return ndoms; +} + +/* + * Rebuild scheduler domains. + * + * Call with neither cgroup_mutex held nor within get_online_cpus(). + * Takes both cgroup_mutex and get_online_cpus(). + * + * Cannot be directly called from cpuset code handling changes + * to the cpuset pseudo-filesystem, because it cannot be called + * from code that already holds cgroup_mutex. + */ +static void do_rebuild_sched_domains(struct work_struct *unused) +{ + struct sched_domain_attr *attr; + cpumask_t *doms; + int ndoms; + get_online_cpus(); - partition_sched_domains(ndoms, doms, dattr); + + /* Generate domain masks and attrs */ + cgroup_lock(); + ndoms = generate_sched_domains(&doms, &attr); + cgroup_unlock(); + + /* Have scheduler rebuild the domains */ + partition_sched_domains(ndoms, doms, attr); + put_online_cpus(); +} -done: - kfree(csa); - /* Don't kfree(doms) -- partition_sched_domains() does that. */ - /* Don't kfree(dattr) -- partition_sched_domains() does that. */ +static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains); + +/* + * Rebuild scheduler domains, asynchronously via workqueue. + * + * If the flag 'sched_load_balance' of any cpuset with non-empty + * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset + * which has that flag enabled, or if any cpuset with a non-empty + * 'cpus' is removed, then call this routine to rebuild the + * scheduler's dynamic sched domains. + * + * The rebuild_sched_domains() and partition_sched_domains() + * routines must nest cgroup_lock() inside get_online_cpus(), + * but such cpuset changes as these must nest that locking the + * other way, holding cgroup_lock() for much of the code. + * + * So in order to avoid an ABBA deadlock, the cpuset code handling + * these user changes delegates the actual sched domain rebuilding + * to a separate workqueue thread, which ends up processing the + * above do_rebuild_sched_domains() function. + */ +static void async_rebuild_sched_domains(void) +{ + schedule_work(&rebuild_sched_domains_work); +} + +/* + * Accomplishes the same scheduler domain rebuild as the above + * async_rebuild_sched_domains(), however it directly calls the + * rebuild routine synchronously rather than calling it via an + * asynchronous work thread. + * + * This can only be called from code that is not holding + * cgroup_mutex (not nested in a cgroup_lock() call.) + */ +void rebuild_sched_domains(void) +{ + do_rebuild_sched_domains(NULL); } /** @@ -863,7 +932,7 @@ static int update_cpumask(struct cpuset *cs, const char *buf) return retval; if (is_load_balanced) - rebuild_sched_domains(); + async_rebuild_sched_domains(); return 0; } @@ -1090,7 +1159,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) if (val != cs->relax_domain_level) { cs->relax_domain_level = val; if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs)) - rebuild_sched_domains(); + async_rebuild_sched_domains(); } return 0; @@ -1131,7 +1200,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, mutex_unlock(&callback_mutex); if (cpus_nonempty && balance_flag_changed) - rebuild_sched_domains(); + async_rebuild_sched_domains(); return 0; } @@ -1492,6 +1561,9 @@ static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft) default: BUG(); } + + /* Unreachable but makes gcc happy */ + return 0; } static s64 cpuset_read_s64(struct cgroup *cont, struct cftype *cft) @@ -1504,6 +1576,9 @@ static s64 cpuset_read_s64(struct cgroup *cont, struct cftype *cft) default: BUG(); } + + /* Unrechable but makes gcc happy */ + return 0; } @@ -1692,15 +1767,9 @@ static struct cgroup_subsys_state *cpuset_create( } /* - * Locking note on the strange update_flag() call below: - * * If the cpuset being removed has its flag 'sched_load_balance' * enabled, then simulate turning sched_load_balance off, which - * will call rebuild_sched_domains(). The get_online_cpus() - * call in rebuild_sched_domains() must not be made while holding - * callback_mutex. Elsewhere the kernel nests callback_mutex inside - * get_online_cpus() calls. So the reverse nesting would risk an - * ABBA deadlock. + * will call async_rebuild_sched_domains(). */ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont) @@ -1719,7 +1788,7 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont) struct cgroup_subsys cpuset_subsys = { .name = "cpuset", .create = cpuset_create, - .destroy = cpuset_destroy, + .destroy = cpuset_destroy, .can_attach = cpuset_can_attach, .attach = cpuset_attach, .populate = cpuset_populate, @@ -1811,7 +1880,7 @@ static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to) } /* - * If common_cpu_mem_hotplug_unplug(), below, unplugs any CPUs + * If CPU and/or memory hotplug handlers, below, unplug any CPUs * or memory nodes, we need to walk over the cpuset hierarchy, * removing that CPU or node from all cpusets. If this removes the * last CPU or node from a cpuset, then move the tasks in the empty @@ -1903,35 +1972,6 @@ static void scan_for_empty_cpusets(const struct cpuset *root) } /* - * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track - * cpu_online_map and node_states[N_HIGH_MEMORY]. Force the top cpuset to - * track what's online after any CPU or memory node hotplug or unplug event. - * - * Since there are two callers of this routine, one for CPU hotplug - * events and one for memory node hotplug events, we could have coded - * two separate routines here. We code it as a single common routine - * in order to minimize text size. - */ - -static void common_cpu_mem_hotplug_unplug(int rebuild_sd) -{ - cgroup_lock(); - - top_cpuset.cpus_allowed = cpu_online_map; - top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; - scan_for_empty_cpusets(&top_cpuset); - - /* - * Scheduler destroys domains on hotplug events. - * Rebuild them based on the current settings. - */ - if (rebuild_sd) - rebuild_sched_domains(); - - cgroup_unlock(); -} - -/* * The top_cpuset tracks what CPUs and Memory Nodes are online, * period. This is necessary in order to make cpusets transparent * (of no affect) on systems that are actively using CPU hotplug @@ -1939,40 +1979,52 @@ static void common_cpu_mem_hotplug_unplug(int rebuild_sd) * * This routine ensures that top_cpuset.cpus_allowed tracks * cpu_online_map on each CPU hotplug (cpuhp) event. + * + * Called within get_online_cpus(). Needs to call cgroup_lock() + * before calling generate_sched_domains(). */ - -static int cpuset_handle_cpuhp(struct notifier_block *unused_nb, +static int cpuset_track_online_cpus(struct notifier_block *unused_nb, unsigned long phase, void *unused_cpu) { + struct sched_domain_attr *attr; + cpumask_t *doms; + int ndoms; + switch (phase) { - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: case CPU_ONLINE: case CPU_ONLINE_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: - common_cpu_mem_hotplug_unplug(1); break; + default: return NOTIFY_DONE; } + cgroup_lock(); + top_cpuset.cpus_allowed = cpu_online_map; + scan_for_empty_cpusets(&top_cpuset); + ndoms = generate_sched_domains(&doms, &attr); + cgroup_unlock(); + + /* Have scheduler rebuild the domains */ + partition_sched_domains(ndoms, doms, attr); + return NOTIFY_OK; } #ifdef CONFIG_MEMORY_HOTPLUG /* * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY]. - * Call this routine anytime after you change - * node_states[N_HIGH_MEMORY]. - * See also the previous routine cpuset_handle_cpuhp(). + * Call this routine anytime after node_states[N_HIGH_MEMORY] changes. + * See also the previous routine cpuset_track_online_cpus(). */ - void cpuset_track_online_nodes(void) { - common_cpu_mem_hotplug_unplug(0); + cgroup_lock(); + top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; + scan_for_empty_cpusets(&top_cpuset); + cgroup_unlock(); } #endif @@ -1987,7 +2039,7 @@ void __init cpuset_init_smp(void) top_cpuset.cpus_allowed = cpu_online_map; top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; - hotcpu_notifier(cpuset_handle_cpuhp, 0); + hotcpu_notifier(cpuset_track_online_cpus, 0); } /** diff --git a/kernel/sched.c b/kernel/sched.c index 1a5f73c1fcdc..cc1f81b50b82 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7696,24 +7696,27 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, * and partition_sched_domains() will fallback to the single partition * 'fallback_doms', it also forces the domains to be rebuilt. * + * If doms_new==NULL it will be replaced with cpu_online_map. + * ndoms_new==0 is a special case for destroying existing domains. + * It will not create the default domain. + * * Call with hotplug lock held */ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, struct sched_domain_attr *dattr_new) { - int i, j; + int i, j, n; mutex_lock(&sched_domains_mutex); /* always unregister in case we don't destroy any domains */ unregister_sched_domain_sysctl(); - if (doms_new == NULL) - ndoms_new = 0; + n = doms_new ? ndoms_new : 0; /* Destroy deleted domains */ for (i = 0; i < ndoms_cur; i++) { - for (j = 0; j < ndoms_new; j++) { + for (j = 0; j < n; j++) { if (cpus_equal(doms_cur[i], doms_new[j]) && dattrs_equal(dattr_cur, i, dattr_new, j)) goto match1; @@ -7726,7 +7729,6 @@ match1: if (doms_new == NULL) { ndoms_cur = 0; - ndoms_new = 1; doms_new = &fallback_doms; cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); dattr_new = NULL; @@ -7763,8 +7765,13 @@ match2: int arch_reinit_sched_domains(void) { get_online_cpus(); + + /* Destroy domains first to force the rebuild */ + partition_sched_domains(0, NULL, NULL); + rebuild_sched_domains(); put_online_cpus(); + return 0; } @@ -7848,7 +7855,7 @@ static int update_sched_domains(struct notifier_block *nfb, case CPU_ONLINE_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: - partition_sched_domains(0, NULL, NULL); + partition_sched_domains(1, NULL, NULL); return NOTIFY_OK; default: diff --git a/lib/vsprintf.c b/lib/vsprintf.c index d8d1d1142248..c399bc1093cb 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -27,6 +27,7 @@ #include <asm/page.h> /* for PAGE_SIZE */ #include <asm/div64.h> +#include <asm/sections.h> /* for dereference_function_descriptor() */ /* Works only for digits and letters, but small and fast */ #define TOLOWER(x) ((x) | 0x20) @@ -513,16 +514,6 @@ static char *string(char *buf, char *end, char *s, int field_width, int precisio return buf; } -static inline void *dereference_function_descriptor(void *ptr) -{ -#if defined(CONFIG_IA64) || defined(CONFIG_PPC64) - void *p; - if (!probe_kernel_address(ptr, p)) - ptr = p; -#endif - return ptr; -} - static char *symbol_string(char *buf, char *end, void *ptr, int field_width, int precision, int flags) { unsigned long value = (unsigned long) ptr; diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index eeee218eed80..5bbf07362172 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -188,15 +188,21 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return 0; case BRCTL_SET_BRIDGE_HELLO_TIME: + { + unsigned long t = clock_t_to_jiffies(args[1]); if (!capable(CAP_NET_ADMIN)) return -EPERM; + if (t < HZ) + return -EINVAL; + spin_lock_bh(&br->lock); - br->bridge_hello_time = clock_t_to_jiffies(args[1]); + br->bridge_hello_time = t; if (br_is_root_bridge(br)) br->hello_time = br->bridge_hello_time; spin_unlock_bh(&br->lock); return 0; + } case BRCTL_SET_BRIDGE_MAX_AGE: if (!capable(CAP_NET_ADMIN)) diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 27d6a511c8c1..158dee8b4965 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -29,11 +29,12 @@ */ static ssize_t store_bridge_parm(struct device *d, const char *buf, size_t len, - void (*set)(struct net_bridge *, unsigned long)) + int (*set)(struct net_bridge *, unsigned long)) { struct net_bridge *br = to_bridge(d); char *endp; unsigned long val; + int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -43,9 +44,9 @@ static ssize_t store_bridge_parm(struct device *d, return -EINVAL; spin_lock_bh(&br->lock); - (*set)(br, val); + err = (*set)(br, val); spin_unlock_bh(&br->lock); - return len; + return err ? err : len; } @@ -56,12 +57,13 @@ static ssize_t show_forward_delay(struct device *d, return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay)); } -static void set_forward_delay(struct net_bridge *br, unsigned long val) +static int set_forward_delay(struct net_bridge *br, unsigned long val) { unsigned long delay = clock_t_to_jiffies(val); br->forward_delay = delay; if (br_is_root_bridge(br)) br->bridge_forward_delay = delay; + return 0; } static ssize_t store_forward_delay(struct device *d, @@ -80,12 +82,17 @@ static ssize_t show_hello_time(struct device *d, struct device_attribute *attr, jiffies_to_clock_t(to_bridge(d)->hello_time)); } -static void set_hello_time(struct net_bridge *br, unsigned long val) +static int set_hello_time(struct net_bridge *br, unsigned long val) { unsigned long t = clock_t_to_jiffies(val); + + if (t < HZ) + return -EINVAL; + br->hello_time = t; if (br_is_root_bridge(br)) br->bridge_hello_time = t; + return 0; } static ssize_t store_hello_time(struct device *d, @@ -104,12 +111,13 @@ static ssize_t show_max_age(struct device *d, struct device_attribute *attr, jiffies_to_clock_t(to_bridge(d)->max_age)); } -static void set_max_age(struct net_bridge *br, unsigned long val) +static int set_max_age(struct net_bridge *br, unsigned long val) { unsigned long t = clock_t_to_jiffies(val); br->max_age = t; if (br_is_root_bridge(br)) br->bridge_max_age = t; + return 0; } static ssize_t store_max_age(struct device *d, struct device_attribute *attr, @@ -126,9 +134,10 @@ static ssize_t show_ageing_time(struct device *d, return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->ageing_time)); } -static void set_ageing_time(struct net_bridge *br, unsigned long val) +static int set_ageing_time(struct net_bridge *br, unsigned long val) { br->ageing_time = clock_t_to_jiffies(val); + return 0; } static ssize_t store_ageing_time(struct device *d, @@ -180,9 +189,10 @@ static ssize_t show_priority(struct device *d, struct device_attribute *attr, (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]); } -static void set_priority(struct net_bridge *br, unsigned long val) +static int set_priority(struct net_bridge *br, unsigned long val) { br_stp_set_bridge_priority(br, (u16) val); + return 0; } static ssize_t store_priority(struct device *d, struct device_attribute *attr, diff --git a/net/core/dev.c b/net/core/dev.c index 60c51f765887..e719ed29310f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1991,8 +1991,13 @@ static void net_tx_action(struct softirq_action *h) spin_unlock(root_lock); } else { if (!test_bit(__QDISC_STATE_DEACTIVATED, - &q->state)) + &q->state)) { __netif_reschedule(q); + } else { + smp_mb__before_clear_bit(); + clear_bit(__QDISC_STATE_SCHED, + &q->state); + } } } } diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index d985bd613d25..743f011b9a84 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -409,3 +409,38 @@ out: } EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); + +void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, + struct inet_timewait_death_row *twdr, int family) +{ + struct inet_timewait_sock *tw; + struct sock *sk; + struct hlist_node *node; + int h; + + local_bh_disable(); + for (h = 0; h < (hashinfo->ehash_size); h++) { + struct inet_ehash_bucket *head = + inet_ehash_bucket(hashinfo, h); + rwlock_t *lock = inet_ehash_lockp(hashinfo, h); +restart: + write_lock(lock); + sk_for_each(sk, node, &head->twchain) { + + tw = inet_twsk(sk); + if (!net_eq(twsk_net(tw), net) || + tw->tw_family != family) + continue; + + atomic_inc(&tw->tw_refcnt); + write_unlock(lock); + inet_twsk_deschedule(tw, twdr); + inet_twsk_put(tw); + + goto restart; + } + write_unlock(lock); + } + local_bh_enable(); +} +EXPORT_SYMBOL_GPL(inet_twsk_purge); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 44c1e934824b..1b4fee20fc93 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2376,6 +2376,7 @@ static int __net_init tcp_sk_init(struct net *net) static void __net_exit tcp_sk_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv4.tcp_sock); + inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET); } static struct pernet_operations __net_initdata tcp_sk_ops = { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5b90b369ccb2..b585c850a89a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2148,6 +2148,7 @@ static int tcpv6_net_init(struct net *net) static void tcpv6_net_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv6.tcp_sk); + inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET6); } static struct pernet_operations tcpv6_net_ops = { diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 1b1226d6653f..20633fdf7e6b 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -68,11 +68,21 @@ static const char *const dccprotos[] = { static int parse_dcc(char *data, const char *data_end, u_int32_t *ip, u_int16_t *port, char **ad_beg_p, char **ad_end_p) { + char *tmp; + /* at least 12: "AAAAAAAA P\1\n" */ while (*data++ != ' ') if (data > data_end - 12) return -1; + /* Make sure we have a newline character within the packet boundaries + * because simple_strtoul parses until the first invalid character. */ + for (tmp = data; tmp <= data_end; tmp++) + if (*tmp == '\n') + break; + if (tmp > data_end || *tmp != '\n') + return -1; + *ad_beg_p = data; *ip = simple_strtoul(data, &data, 10); diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 654a4f7f12c6..9bd03967fea4 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -45,12 +45,12 @@ static LIST_HEAD(gre_keymap_list); void nf_ct_gre_keymap_flush(void) { - struct list_head *pos, *n; + struct nf_ct_gre_keymap *km, *tmp; write_lock_bh(&nf_ct_gre_lock); - list_for_each_safe(pos, n, &gre_keymap_list) { - list_del(pos); - kfree(pos); + list_for_each_entry_safe(km, tmp, &gre_keymap_list, list) { + list_del(&km->list); + kfree(km); } write_unlock_bh(&nf_ct_gre_lock); } @@ -97,10 +97,14 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, kmp = &help->help.ct_pptp_info.keymap[dir]; if (*kmp) { /* check whether it's a retransmission */ + read_lock_bh(&nf_ct_gre_lock); list_for_each_entry(km, &gre_keymap_list, list) { - if (gre_key_cmpfn(km, t) && km == *kmp) + if (gre_key_cmpfn(km, t) && km == *kmp) { + read_unlock_bh(&nf_ct_gre_lock); return 0; + } } + read_unlock_bh(&nf_ct_gre_lock); pr_debug("trying to override keymap_%s for ct %p\n", dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct); return -EEXIST; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 2f9bbc058b48..1fa306be60fb 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1193,7 +1193,6 @@ static const struct sip_handler sip_handlers[] = { static int process_sip_response(struct sk_buff *skb, const char **dptr, unsigned int *datalen) { - static const struct sip_handler *handler; enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); unsigned int matchoff, matchlen; @@ -1214,6 +1213,8 @@ static int process_sip_response(struct sk_buff *skb, dataoff = matchoff + matchlen + 1; for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) { + const struct sip_handler *handler; + handler = &sip_handlers[i]; if (handler->response == NULL) continue; @@ -1228,13 +1229,14 @@ static int process_sip_response(struct sk_buff *skb, static int process_sip_request(struct sk_buff *skb, const char **dptr, unsigned int *datalen) { - static const struct sip_handler *handler; enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); unsigned int matchoff, matchlen; unsigned int cseq, i; for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) { + const struct sip_handler *handler; + handler = &sip_handlers[i]; if (handler->request == NULL) continue; |