diff options
89 files changed, 1585 insertions, 1140 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 5ddbe350487a..20d3b94703a4 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -335,3 +335,12 @@ Why: In 2.6.18 the Secmark concept was introduced to replace the "compat_net" Secmark, it is time to deprecate the older mechanism and start the process of removing the old code. Who: Paul Moore <paul.moore@hp.com> +--------------------------- + +What: sysfs ui for changing p4-clockmod parameters +When: September 2009 +Why: See commits 129f8ae9b1b5be94517da76009ea956e89104ce8 and + e088e4c9cdb618675874becb91b2fd581ee707e6. + Removal is subject to fixing any remaining bugs in ACPI which may + cause the thermal throttling not to happen at the right time. +Who: Dave Jones <davej@redhat.com>, Matthew Garrett <mjg@redhat.com> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 454f42b21f16..7643483bdd6a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2344,6 +2344,8 @@ and is between 256 and 4096 characters. It is defined in the file tp720= [HW,PS2] + trace_buf_size=nn[KMG] [ftrace] will set tracing buffer size. + trix= [HW,OSS] MediaTrix AudioTrix Pro Format: <io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq> diff --git a/Documentation/networking/ipv6.txt b/Documentation/networking/ipv6.txt new file mode 100644 index 000000000000..268e5c103dd8 --- /dev/null +++ b/Documentation/networking/ipv6.txt @@ -0,0 +1,35 @@ + +Options for the ipv6 module are supplied as parameters at load time. + +Module options may be given as command line arguments to the insmod +or modprobe command, but are usually specified in either the +/etc/modules.conf or /etc/modprobe.conf configuration file, or in a +distro-specific configuration file. + +The available ipv6 module parameters are listed below. If a parameter +is not specified the default value is used. + +The parameters are as follows: + +disable + + Specifies whether to load the IPv6 module, but disable all + its functionality. This might be used when another module + has a dependency on the IPv6 module being loaded, but no + IPv6 addresses or operations are desired. + + The possible values and their effects are: + + 0 + IPv6 is enabled. + + This is the default value. + + 1 + IPv6 is disabled. + + No IPv6 addresses will be added to interfaces, and + it will not be possible to open an IPv6 socket. + + A reboot is required to enable IPv6. + diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt index 6f0a044f5b5e..4ff43c6de299 100644 --- a/Documentation/tracepoints.txt +++ b/Documentation/tracepoints.txt @@ -45,8 +45,8 @@ In include/trace/subsys.h : #include <linux/tracepoint.h> DECLARE_TRACE(subsys_eventname, - TPPROTO(int firstarg, struct task_struct *p), - TPARGS(firstarg, p)); + TP_PROTO(int firstarg, struct task_struct *p), + TP_ARGS(firstarg, p)); In subsys/file.c (where the tracing statement must be added) : @@ -66,10 +66,10 @@ Where : - subsys is the name of your subsystem. - eventname is the name of the event to trace. -- TPPROTO(int firstarg, struct task_struct *p) is the prototype of the +- TP_PROTO(int firstarg, struct task_struct *p) is the prototype of the function called by this tracepoint. -- TPARGS(firstarg, p) are the parameters names, same as found in the +- TP_ARGS(firstarg, p) are the parameters names, same as found in the prototype. Connecting a function (probe) to a tracepoint is done by providing a diff --git a/arch/arm/mach-omap2/board-ldp.c b/arch/arm/mach-omap2/board-ldp.c index f6a13451d1fd..6031e179926b 100644 --- a/arch/arm/mach-omap2/board-ldp.c +++ b/arch/arm/mach-omap2/board-ldp.c @@ -81,7 +81,7 @@ static inline void __init ldp_init_smc911x(void) } ldp_smc911x_resources[0].start = cs_mem_base + 0x0; - ldp_smc911x_resources[0].end = cs_mem_base + 0xf; + ldp_smc911x_resources[0].end = cs_mem_base + 0xff; udelay(100); eth_gpio = LDP_SMC911X_GPIO; diff --git a/arch/blackfin/include/asm/percpu.h b/arch/blackfin/include/asm/percpu.h index 797c0c165069..c94c7bc88c71 100644 --- a/arch/blackfin/include/asm/percpu.h +++ b/arch/blackfin/include/asm/percpu.h @@ -3,14 +3,4 @@ #include <asm-generic/percpu.h> -#ifdef CONFIG_MODULES -#define PERCPU_MODULE_RESERVE 8192 -#else -#define PERCPU_MODULE_RESERVE 0 -#endif - -#define PERCPU_ENOUGH_ROOM \ - (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ - PERCPU_MODULE_RESERVE) - #endif /* __ARCH_BLACKFIN_PERCPU__ */ diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 9320e2a8a26a..a0d70b46c27c 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -4,11 +4,6 @@ #undef notrace #define notrace __attribute__((no_instrument_function)) -#ifdef CONFIG_X86_64 -#define __ALIGN .p2align 4,,15 -#define __ALIGN_STR ".p2align 4,,15" -#endif - #ifdef CONFIG_X86_32 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) /* @@ -50,16 +45,25 @@ __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ "g" (arg4), "g" (arg5), "g" (arg6)) -#endif +#endif /* CONFIG_X86_32 */ + +#ifdef __ASSEMBLY__ #define GLOBAL(name) \ .globl name; \ name: +#ifdef CONFIG_X86_64 +#define __ALIGN .p2align 4,,15 +#define __ALIGN_STR ".p2align 4,,15" +#endif + #ifdef CONFIG_X86_ALIGNMENT_16 #define __ALIGN .align 16,0x90 #define __ALIGN_STR ".align 16,0x90" #endif +#endif /* __ASSEMBLY__ */ + #endif /* _ASM_X86_LINKAGE_H */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 25423a5b80ed..f47df59016c5 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -5,6 +5,7 @@ #include <asm/io.h> #include <asm/processor.h> #include <asm/apic.h> +#include <asm/cpu.h> #ifdef CONFIG_X86_64 # include <asm/numa_64.h> @@ -141,6 +142,55 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) } } +static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + /* calling is from identify_secondary_cpu() ? */ + if (c->cpu_index == boot_cpu_id) + return; + + /* + * Certain Athlons might work (for various values of 'work') in SMP + * but they are not certified as MP capable. + */ + /* Athlon 660/661 is valid. */ + if ((c->x86_model == 6) && ((c->x86_mask == 0) || + (c->x86_mask == 1))) + goto valid_k7; + + /* Duron 670 is valid */ + if ((c->x86_model == 7) && (c->x86_mask == 0)) + goto valid_k7; + + /* + * Athlon 662, Duron 671, and Athlon >model 7 have capability + * bit. It's worth noting that the A5 stepping (662) of some + * Athlon XP's have the MP bit set. + * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for + * more. + */ + if (((c->x86_model == 6) && (c->x86_mask >= 2)) || + ((c->x86_model == 7) && (c->x86_mask >= 1)) || + (c->x86_model > 7)) + if (cpu_has_mp) + goto valid_k7; + + /* If we get here, not a certified SMP capable AMD system. */ + + /* + * Don't taint if we are running SMP kernel on a single non-MP + * approved Athlon + */ + WARN_ONCE(1, "WARNING: This combination of AMD" + "processors is not suitable for SMP.\n"); + if (!test_taint(TAINT_UNSAFE_SMP)) + add_taint(TAINT_UNSAFE_SMP); + +valid_k7: + ; +#endif +} + static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) { u32 l, h; @@ -175,6 +225,8 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) } set_cpu_cap(c, X86_FEATURE_K7); + + amd_k7_smp_check(c); } #endif diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index b585e04cbc9e..3178c3acd97e 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -277,7 +277,6 @@ static struct cpufreq_driver p4clockmod_driver = { .name = "p4-clockmod", .owner = THIS_MODULE, .attr = p4clockmod_attr, - .hide_interface = 1, }; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 1a89a2b68d15..c1c04bf0df77 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -14,6 +14,7 @@ #include <asm/uaccess.h> #include <asm/ds.h> #include <asm/bugs.h> +#include <asm/cpu.h> #ifdef CONFIG_X86_64 #include <asm/topology.h> @@ -116,6 +117,28 @@ static void __cpuinit trap_init_f00f_bug(void) } #endif +static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + /* calling is from identify_secondary_cpu() ? */ + if (c->cpu_index == boot_cpu_id) + return; + + /* + * Mask B, Pentium, but not Pentium MMX + */ + if (c->x86 == 5 && + c->x86_mask >= 1 && c->x86_mask <= 4 && + c->x86_model <= 3) { + /* + * Remember we have B step Pentia with bugs + */ + WARN_ONCE(1, "WARNING: SMP operation may be unreliable" + "with B stepping processors.\n"); + } +#endif +} + static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) { unsigned long lo, hi; @@ -192,6 +215,8 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_NUMAQ numaq_tsc_disable(); #endif + + intel_smp_check(c); } #else static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index c29f301d3885..efa615f2bf43 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -42,6 +42,19 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { }; EXPORT_SYMBOL(__per_cpu_offset); +/* + * On x86_64 symbols referenced from code should be reachable using + * 32bit relocations. Reserve space for static percpu variables in + * modules so that they are always served from the first chunk which + * is located at the percpu segment base. On x86_32, anything can + * address anywhere. No need to reserve space in the first chunk. + */ +#ifdef CONFIG_X86_64 +#define PERCPU_FIRST_CHUNK_RESERVE PERCPU_MODULE_RESERVE +#else +#define PERCPU_FIRST_CHUNK_RESERVE 0 +#endif + /** * pcpu_need_numa - determine percpu allocation needs to consider NUMA * @@ -141,7 +154,7 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) { static struct vm_struct vm; pg_data_t *last; - size_t ptrs_size; + size_t ptrs_size, dyn_size; unsigned int cpu; ssize_t ret; @@ -169,12 +182,14 @@ proceed: * Currently supports only single page. Supporting multiple * pages won't be too difficult if it ever becomes necessary. */ - pcpur_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); + pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + + PERCPU_DYNAMIC_RESERVE); if (pcpur_size > PMD_SIZE) { pr_warning("PERCPU: static data is larger than large page, " "can't use large page\n"); return -EINVAL; } + dyn_size = pcpur_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; /* allocate pointer array and alloc large pages */ ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); @@ -217,8 +232,9 @@ proceed: pr_info("PERCPU: Remapped at %p with large pages, static data " "%zu bytes\n", vm.addr, static_size); - ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, PMD_SIZE, - pcpur_size - static_size, vm.addr, NULL); + ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, + PERCPU_FIRST_CHUNK_RESERVE, + PMD_SIZE, dyn_size, vm.addr, NULL); goto out_free_ar; enomem: @@ -241,24 +257,31 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) * Embedding allocator * * The first chunk is sized to just contain the static area plus - * PERCPU_DYNAMIC_RESERVE and allocated as a contiguous area using - * bootmem allocator and used as-is without being mapped into vmalloc - * area. This enables the first chunk to piggy back on the linear - * physical PMD mapping and doesn't add any additional pressure to - * TLB. + * module and dynamic reserves, and allocated as a contiguous area + * using bootmem allocator and used as-is without being mapped into + * vmalloc area. This enables the first chunk to piggy back on the + * linear physical PMD mapping and doesn't add any additional pressure + * to TLB. Note that if the needed size is smaller than the minimum + * unit size, the leftover is returned to the bootmem allocator. */ static void *pcpue_ptr __initdata; +static size_t pcpue_size __initdata; static size_t pcpue_unit_size __initdata; static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) { - return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size - + ((size_t)pageno << PAGE_SHIFT)); + size_t off = (size_t)pageno << PAGE_SHIFT; + + if (off >= pcpue_size) + return NULL; + + return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off); } static ssize_t __init setup_pcpu_embed(size_t static_size) { unsigned int cpu; + size_t dyn_size; /* * If large page isn't supported, there's no benefit in doing @@ -269,25 +292,32 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) return -EINVAL; /* allocate and copy */ - pcpue_unit_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); - pcpue_unit_size = max_t(size_t, pcpue_unit_size, PCPU_MIN_UNIT_SIZE); + pcpue_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + + PERCPU_DYNAMIC_RESERVE); + pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); + dyn_size = pcpue_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; + pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size, PAGE_SIZE); if (!pcpue_ptr) return -ENOMEM; - for_each_possible_cpu(cpu) - memcpy(pcpue_ptr + cpu * pcpue_unit_size, __per_cpu_load, - static_size); + for_each_possible_cpu(cpu) { + void *ptr = pcpue_ptr + cpu * pcpue_unit_size; + + free_bootmem(__pa(ptr + pcpue_size), + pcpue_unit_size - pcpue_size); + memcpy(ptr, __per_cpu_load, static_size); + } /* we're ready, commit */ pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", - pcpue_unit_size >> PAGE_SHIFT, pcpue_ptr, static_size); + pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); return pcpu_setup_first_chunk(pcpue_get_page, static_size, - pcpue_unit_size, - pcpue_unit_size - static_size, pcpue_ptr, - NULL); + PERCPU_FIRST_CHUNK_RESERVE, + pcpue_unit_size, dyn_size, + pcpue_ptr, NULL); } /* @@ -344,7 +374,8 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", pcpu4k_nr_static_pages, static_size); - ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, 0, 0, NULL, + ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, + PERCPU_FIRST_CHUNK_RESERVE, -1, -1, NULL, pcpu4k_populate_pte); goto out_free_ar; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 249334f5080a..ef7d10170c30 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -114,10 +114,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); atomic_t init_deasserted; - -/* Set if we find a B stepping CPU */ -static int __cpuinitdata smp_b_stepping; - #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) /* which logical CPUs are on which nodes */ @@ -271,8 +267,6 @@ static void __cpuinit smp_callin(void) cpumask_set_cpu(cpuid, cpu_callin_mask); } -static int __cpuinitdata unsafe_smp; - /* * Activate a secondary processor. */ @@ -340,76 +334,6 @@ notrace static void __cpuinit start_secondary(void *unused) cpu_idle(); } -static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) -{ - /* - * Mask B, Pentium, but not Pentium MMX - */ - if (c->x86_vendor == X86_VENDOR_INTEL && - c->x86 == 5 && - c->x86_mask >= 1 && c->x86_mask <= 4 && - c->x86_model <= 3) - /* - * Remember we have B step Pentia with bugs - */ - smp_b_stepping = 1; - - /* - * Certain Athlons might work (for various values of 'work') in SMP - * but they are not certified as MP capable. - */ - if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { - - if (num_possible_cpus() == 1) - goto valid_k7; - - /* Athlon 660/661 is valid. */ - if ((c->x86_model == 6) && ((c->x86_mask == 0) || - (c->x86_mask == 1))) - goto valid_k7; - - /* Duron 670 is valid */ - if ((c->x86_model == 7) && (c->x86_mask == 0)) - goto valid_k7; - - /* - * Athlon 662, Duron 671, and Athlon >model 7 have capability - * bit. It's worth noting that the A5 stepping (662) of some - * Athlon XP's have the MP bit set. - * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for - * more. - */ - if (((c->x86_model == 6) && (c->x86_mask >= 2)) || - ((c->x86_model == 7) && (c->x86_mask >= 1)) || - (c->x86_model > 7)) - if (cpu_has_mp) - goto valid_k7; - - /* If we get here, not a certified SMP capable AMD system. */ - unsafe_smp = 1; - } - -valid_k7: - ; -} - -static void __cpuinit smp_checks(void) -{ - if (smp_b_stepping) - printk(KERN_WARNING "WARNING: SMP operation may be unreliable" - "with B stepping processors.\n"); - - /* - * Don't taint if we are running SMP kernel on a single non-MP - * approved Athlon - */ - if (unsafe_smp && num_online_cpus() > 1) { - printk(KERN_INFO "WARNING: This combination of AMD" - "processors is not suitable for SMP.\n"); - add_taint(TAINT_UNSAFE_SMP); - } -} - /* * The bootstrap kernel entry code has set these up. Save them for * a given CPU @@ -423,7 +347,6 @@ void __cpuinit smp_store_cpu_info(int id) c->cpu_index = id; if (id != 0) identify_secondary_cpu(c); - smp_apply_quirks(c); } @@ -1193,7 +1116,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) pr_debug("Boot done.\n"); impress_friends(); - smp_checks(); #ifdef CONFIG_X86_IO_APIC setup_ioapic_dest(); #endif diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index f04549afcfe9..d038b9c45cf8 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -314,8 +314,6 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, int locals = 0; struct bau_desc *bau_desc; - WARN_ON(!in_atomic()); - cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); uv_cpu = uv_blade_processor_id(); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index f3a5305b8adf..9fe4ddaa8f6f 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -348,6 +348,11 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, * flush_tlb_user() for both user and kernel mappings unless * the Page Global Enable (PGE) feature bit is set. */ *dx |= 0x00002000; + /* We also lie, and say we're family id 5. 6 or greater + * leads to a rdmsr in early_init_intel which we can't handle. + * Family ID is returned as bits 8-12 in ax. */ + *ax &= 0xFFFFF0FF; + *ax |= 0x00000500; break; case 0x80000000: /* Futureproof this a little: if they ask how much extended @@ -594,19 +599,21 @@ static void __init lguest_init_IRQ(void) /* Some systems map "vectors" to interrupts weirdly. Lguest has * a straightforward 1 to 1 mapping, so force that here. */ __get_cpu_var(vector_irq)[vector] = i; - if (vector != SYSCALL_VECTOR) { - set_intr_gate(vector, - interrupt[vector-FIRST_EXTERNAL_VECTOR]); - set_irq_chip_and_handler_name(i, &lguest_irq_controller, - handle_level_irq, - "level"); - } + if (vector != SYSCALL_VECTOR) + set_intr_gate(vector, interrupt[i]); } /* This call is required to set up for 4k stacks, where we have * separate stacks for hard and soft interrupts. */ irq_ctx_init(smp_processor_id()); } +void lguest_setup_irq(unsigned int irq) +{ + irq_to_desc_alloc_cpu(irq, 0); + set_irq_chip_and_handler_name(irq, &lguest_irq_controller, + handle_level_irq, "level"); +} + /* * Time. * diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 6d63e3d1253d..15219e0d1243 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -134,8 +134,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, { unsigned long page_size_mask = 0; unsigned long start_pfn, end_pfn; + unsigned long ret = 0; unsigned long pos; - unsigned long ret; struct map_range mr[NR_RANGE_MR]; int nr_range, i; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index d7f5060ab21c..749559ed80f5 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -806,11 +806,6 @@ static unsigned long __init setup_node_bootmem(int nodeid, { unsigned long bootmap_size; - if (start_pfn > max_low_pfn) - return bootmap; - if (end_pfn > max_low_pfn) - end_pfn = max_low_pfn; - /* don't touch min_low_pfn */ bootmap_size = init_bootmem_node(NODE_DATA(nodeid), bootmap >> PAGE_SHIFT, @@ -843,13 +838,23 @@ void __init setup_bootmem_allocator(void) max_pfn_mapped<<PAGE_SHIFT); printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); + for_each_online_node(nodeid) { + unsigned long start_pfn, end_pfn; + #ifdef CONFIG_NEED_MULTIPLE_NODES - for_each_online_node(nodeid) - bootmap = setup_node_bootmem(nodeid, node_start_pfn[nodeid], - node_end_pfn[nodeid], bootmap); + start_pfn = node_start_pfn[nodeid]; + end_pfn = node_end_pfn[nodeid]; + if (start_pfn > max_low_pfn) + continue; + if (end_pfn > max_low_pfn) + end_pfn = max_low_pfn; #else - bootmap = setup_node_bootmem(0, 0, max_low_pfn, bootmap); + start_pfn = 0; + end_pfn = max_low_pfn; #endif + bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, + bootmap); + } after_bootmem = 1; } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 66d6be85df82..1753e8020df6 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -85,7 +85,7 @@ early_param("gbpages", parse_direct_gbpages_on); pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; EXPORT_SYMBOL_GPL(__supported_pte_mask); -static int do_not_nx __cpuinitdata; +static int disable_nx __cpuinitdata; /* * noexec=on|off @@ -100,9 +100,9 @@ static int __init nonx_setup(char *str) return -EINVAL; if (!strncmp(str, "on", 2)) { __supported_pte_mask |= _PAGE_NX; - do_not_nx = 0; + disable_nx = 0; } else if (!strncmp(str, "off", 3)) { - do_not_nx = 1; + disable_nx = 1; __supported_pte_mask &= ~_PAGE_NX; } return 0; @@ -114,7 +114,7 @@ void __cpuinit check_efer(void) unsigned long efer; rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || do_not_nx) + if (!(efer & EFER_NX) || disable_nx) __supported_pte_mask &= ~_PAGE_NX; } diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 62773abdf088..aca924a30ee6 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -87,6 +87,8 @@ bool __virt_addr_valid(unsigned long x) return false; if (__vmalloc_start_set && is_vmalloc_addr((void *) x)) return false; + if (x >= FIXADDR_START) + return false; return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); } EXPORT_SYMBOL(__virt_addr_valid); @@ -504,13 +506,19 @@ static inline pte_t * __init early_ioremap_pte(unsigned long addr) return &bm_pte[pte_index(addr)]; } +static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; + void __init early_ioremap_init(void) { pmd_t *pmd; + int i; if (early_ioremap_debug) printk(KERN_INFO "early_ioremap_init()\n"); + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) + slot_virt[i] = fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); + pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); memset(bm_pte, 0, sizeof(bm_pte)); pmd_populate_kernel(&init_mm, pmd, bm_pte); @@ -577,6 +585,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx) static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; + static int __init check_early_ioremap_leak(void) { int count = 0; @@ -598,7 +607,8 @@ static int __init check_early_ioremap_leak(void) } late_initcall(check_early_ioremap_leak); -static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) +static void __init __iomem * +__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) { unsigned long offset, last_addr; unsigned int nrpages; @@ -664,9 +674,9 @@ static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned lo --nrpages; } if (early_ioremap_debug) - printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); + printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]); - prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0)); + prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]); return prev_map[slot]; } @@ -734,8 +744,3 @@ void __init early_iounmap(void __iomem *addr, unsigned long size) } prev_map[slot] = NULL; } - -void __this_fixmap_does_not_exist(void) -{ - WARN_ON(1); -} diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 9f205030d9aa..6a518dd08a36 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -451,23 +451,24 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head) static void remove_kmmio_fault_pages(struct rcu_head *head) { - struct kmmio_delayed_release *dr = container_of( - head, - struct kmmio_delayed_release, - rcu); + struct kmmio_delayed_release *dr = + container_of(head, struct kmmio_delayed_release, rcu); struct kmmio_fault_page *p = dr->release_list; struct kmmio_fault_page **prevp = &dr->release_list; unsigned long flags; + spin_lock_irqsave(&kmmio_lock, flags); while (p) { - if (!p->count) + if (!p->count) { list_del_rcu(&p->list); - else + prevp = &p->release_next; + } else { *prevp = p->release_next; - prevp = &p->release_next; + } p = p->release_next; } spin_unlock_irqrestore(&kmmio_lock, flags); + /* This is the real RCU destroy call. */ call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); } diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 0bcd7883d036..605c8be06217 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c @@ -100,6 +100,9 @@ static int __init parse_memtest(char *arg) { if (arg) memtest_pattern = simple_strtoul(arg, NULL, 0); + else + memtest_pattern = ARRAY_SIZE(patterns); + return 0; } diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index cc250577d405..eeea477d9601 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -173,7 +173,7 @@ skbfree(struct sk_buff *skb) return; while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0) msleep(Sms); - if (i <= 0) { + if (i < 0) { printk(KERN_ERR "aoe: %s holds ref: %s\n", skb->dev ? skb->dev->name : "netif", diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b55cb67435bd..d6daf3c507d3 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -754,11 +754,6 @@ static struct kobj_type ktype_cpufreq = { .release = cpufreq_sysfs_release, }; -static struct kobj_type ktype_empty_cpufreq = { - .sysfs_ops = &sysfs_ops, - .release = cpufreq_sysfs_release, -}; - /** * cpufreq_add_dev - add a CPU device @@ -892,36 +887,26 @@ static int cpufreq_add_dev(struct sys_device *sys_dev) memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); /* prepare interface data */ - if (!cpufreq_driver->hide_interface) { - ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, - &sys_dev->kobj, "cpufreq"); + ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj, + "cpufreq"); + if (ret) + goto err_out_driver_exit; + + /* set up files for this cpu device */ + drv_attr = cpufreq_driver->attr; + while ((drv_attr) && (*drv_attr)) { + ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr)); if (ret) goto err_out_driver_exit; - - /* set up files for this cpu device */ - drv_attr = cpufreq_driver->attr; - while ((drv_attr) && (*drv_attr)) { - ret = sysfs_create_file(&policy->kobj, - &((*drv_attr)->attr)); - if (ret) - goto err_out_driver_exit; - drv_attr++; - } - if (cpufreq_driver->get) { - ret = sysfs_create_file(&policy->kobj, - &cpuinfo_cur_freq.attr); - if (ret) - goto err_out_driver_exit; - } - if (cpufreq_driver->target) { - ret = sysfs_create_file(&policy->kobj, - &scaling_cur_freq.attr); - if (ret) - goto err_out_driver_exit; - } - } else { - ret = kobject_init_and_add(&policy->kobj, &ktype_empty_cpufreq, - &sys_dev->kobj, "cpufreq"); + drv_attr++; + } + if (cpufreq_driver->get) { + ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr); + if (ret) + goto err_out_driver_exit; + } + if (cpufreq_driver->target) { + ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); if (ret) goto err_out_driver_exit; } diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index b4d44e571d76..8132533d71f9 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -212,6 +212,9 @@ static void lg_notify(struct virtqueue *vq) hcall(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT, 0, 0); } +/* An extern declaration inside a C file is bad form. Don't do it. */ +extern void lguest_setup_irq(unsigned int irq); + /* This routine finds the first virtqueue described in the configuration of * this device and sets it up. * @@ -266,6 +269,9 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, goto unmap; } + /* Make sure the interrupt is allocated. */ + lguest_setup_irq(lvq->config.irq); + /* Tell the interrupt for this virtqueue to go to the virtio_ring * interrupt handler. */ /* FIXME: We used to have a flag for the Host to tell us we could use diff --git a/drivers/net/arm/ks8695net.c b/drivers/net/arm/ks8695net.c index 1cf2f949c0b4..f3a127434897 100644 --- a/drivers/net/arm/ks8695net.c +++ b/drivers/net/arm/ks8695net.c @@ -560,7 +560,7 @@ ks8695_reset(struct ks8695_priv *ksp) msleep(1); } - if (reset_timeout == 0) { + if (reset_timeout < 0) { dev_crit(ksp->dev, "Timeout waiting for DMA engines to reset\n"); /* And blithely carry on */ diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 9fb388388fb7..e0578fe8c0db 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4113,7 +4113,7 @@ static int bond_neigh_setup(struct net_device *dev, struct neigh_parms *parms) const struct net_device_ops *slave_ops = slave->dev->netdev_ops; if (slave_ops->ndo_neigh_setup) - return slave_ops->ndo_neigh_setup(dev, parms); + return slave_ops->ndo_neigh_setup(slave->dev, parms); } return 0; } diff --git a/drivers/net/jme.c b/drivers/net/jme.c index 08b34051c646..a6e1a35a13cb 100644 --- a/drivers/net/jme.c +++ b/drivers/net/jme.c @@ -957,13 +957,14 @@ jme_process_receive(struct jme_adapter *jme, int limit) goto out_inc; i = atomic_read(&rxring->next_to_clean); - while (limit-- > 0) { + while (limit > 0) { rxdesc = rxring->desc; rxdesc += i; if ((rxdesc->descwb.flags & cpu_to_le16(RXWBFLAG_OWN)) || !(rxdesc->descwb.desccnt & RXWBDCNT_WBCPL)) goto out; + --limit; desccnt = rxdesc->descwb.desccnt & RXWBDCNT_DCNT; diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c index e5cb6b1f0ebd..2404a838b1fe 100644 --- a/drivers/net/pcmcia/3c574_cs.c +++ b/drivers/net/pcmcia/3c574_cs.c @@ -1035,7 +1035,8 @@ static int el3_rx(struct net_device *dev, int worklimit) DEBUG(3, "%s: in rx_packet(), status %4.4x, rx_status %4.4x.\n", dev->name, inw(ioaddr+EL3_STATUS), inw(ioaddr+RxStatus)); while (!((rx_status = inw(ioaddr + RxStatus)) & 0x8000) && - (--worklimit >= 0)) { + worklimit > 0) { + worklimit--; if (rx_status & 0x4000) { /* Error, update stats. */ short error = rx_status & 0x3800; dev->stats.rx_errors++; diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c index 73ecc657999d..1e01b8a6dbf3 100644 --- a/drivers/net/pcmcia/3c589_cs.c +++ b/drivers/net/pcmcia/3c589_cs.c @@ -857,7 +857,8 @@ static int el3_rx(struct net_device *dev) DEBUG(3, "%s: in rx_packet(), status %4.4x, rx_status %4.4x.\n", dev->name, inw(ioaddr+EL3_STATUS), inw(ioaddr+RX_STATUS)); while (!((rx_status = inw(ioaddr + RX_STATUS)) & 0x8000) && - (--worklimit >= 0)) { + worklimit > 0) { + worklimit--; if (rx_status & 0x4000) { /* Error, update stats. */ short error = rx_status & 0x3800; dev->stats.rx_errors++; diff --git a/drivers/net/smc911x.h b/drivers/net/smc911x.h index 870b4c33f108..a45952e72018 100644 --- a/drivers/net/smc911x.h +++ b/drivers/net/smc911x.h @@ -42,6 +42,16 @@ #define SMC_USE_16BIT 0 #define SMC_USE_32BIT 1 #define SMC_IRQ_SENSE IRQF_TRIGGER_LOW +#elif defined(CONFIG_ARCH_OMAP34XX) + #define SMC_USE_16BIT 0 + #define SMC_USE_32BIT 1 + #define SMC_IRQ_SENSE IRQF_TRIGGER_LOW + #define SMC_MEM_RESERVED 1 +#elif defined(CONFIG_ARCH_OMAP24XX) + #define SMC_USE_16BIT 0 + #define SMC_USE_32BIT 1 + #define SMC_IRQ_SENSE IRQF_TRIGGER_LOW + #define SMC_MEM_RESERVED 1 #else /* * Default configuration @@ -675,6 +685,7 @@ smc_pxa_dma_outsl(struct smc911x_local *lp, u_long physaddr, #define CHIP_9116 0x0116 #define CHIP_9117 0x0117 #define CHIP_9118 0x0118 +#define CHIP_9211 0x9211 #define CHIP_9215 0x115A #define CHIP_9217 0x117A #define CHIP_9218 0x118A @@ -689,6 +700,7 @@ static const struct chip_id chip_ids[] = { { CHIP_9116, "LAN9116" }, { CHIP_9117, "LAN9117" }, { CHIP_9118, "LAN9118" }, + { CHIP_9211, "LAN9211" }, { CHIP_9215, "LAN9215" }, { CHIP_9217, "LAN9217" }, { CHIP_9218, "LAN9218" }, diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c index 8d64b1da0465..0fcb7503363d 100644 --- a/drivers/net/sungem.c +++ b/drivers/net/sungem.c @@ -1229,7 +1229,7 @@ static void gem_reset(struct gem *gp) break; } while (val & (GREG_SWRST_TXRST | GREG_SWRST_RXRST)); - if (limit <= 0) + if (limit < 0) printk(KERN_ERR "%s: SW reset is ghetto.\n", gp->dev->name); if (gp->phy_type == phy_serialink || gp->phy_type == phy_serdes) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index b080f9493d83..dabdf59f8016 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -1473,7 +1473,8 @@ static void tg3_phy_toggle_apd(struct tg3 *tp, bool enable) { u32 reg; - if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) + if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS) || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) return; reg = MII_TG3_MISC_SHDW_WREN | diff --git a/drivers/net/tokenring/tmspci.c b/drivers/net/tokenring/tmspci.c index 5f601773c260..e2150b3c83d9 100644 --- a/drivers/net/tokenring/tmspci.c +++ b/drivers/net/tokenring/tmspci.c @@ -121,11 +121,6 @@ static int __devinit tms_pci_attach(struct pci_dev *pdev, const struct pci_devic goto err_out_trdev; } - ret = request_irq(pdev->irq, tms380tr_interrupt, IRQF_SHARED, - dev->name, dev); - if (ret) - goto err_out_region; - dev->base_addr = pci_ioaddr; dev->irq = pci_irq_line; dev->dma = 0; @@ -142,7 +137,7 @@ static int __devinit tms_pci_attach(struct pci_dev *pdev, const struct pci_devic ret = tmsdev_init(dev, &pdev->dev); if (ret) { printk("%s: unable to get memory for dev->priv.\n", dev->name); - goto err_out_irq; + goto err_out_region; } tp = netdev_priv(dev); @@ -157,6 +152,11 @@ static int __devinit tms_pci_attach(struct pci_dev *pdev, const struct pci_devic tp->tmspriv = cardinfo; + ret = request_irq(pdev->irq, tms380tr_interrupt, IRQF_SHARED, + dev->name, dev); + if (ret) + goto err_out_tmsdev; + dev->open = tms380tr_open; dev->stop = tms380tr_close; pci_set_drvdata(pdev, dev); @@ -164,15 +164,15 @@ static int __devinit tms_pci_attach(struct pci_dev *pdev, const struct pci_devic ret = register_netdev(dev); if (ret) - goto err_out_tmsdev; + goto err_out_irq; return 0; +err_out_irq: + free_irq(pdev->irq, dev); err_out_tmsdev: pci_set_drvdata(pdev, NULL); tmsdev_term(dev); -err_out_irq: - free_irq(pdev->irq, dev); err_out_region: release_region(pci_ioaddr, TMS_PCI_IO_EXTENT); err_out_trdev: diff --git a/drivers/net/ucc_geth_mii.c b/drivers/net/ucc_geth_mii.c index 54635911305c..0ada4edd56eb 100644 --- a/drivers/net/ucc_geth_mii.c +++ b/drivers/net/ucc_geth_mii.c @@ -107,7 +107,7 @@ int uec_mdio_read(struct mii_bus *bus, int mii_id, int regnum) static int uec_mdio_reset(struct mii_bus *bus) { struct ucc_mii_mng __iomem *regs = (void __iomem *)bus->priv; - unsigned int timeout = PHY_INIT_TIMEOUT; + int timeout = PHY_INIT_TIMEOUT; mutex_lock(&bus->mdio_lock); @@ -123,7 +123,7 @@ static int uec_mdio_reset(struct mii_bus *bus) mutex_unlock(&bus->mdio_lock); - if (timeout <= 0) { + if (timeout < 0) { printk(KERN_ERR "%s: The MII Bus is stuck!\n", bus->name); return -EBUSY; } diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 5b67bbf1987e..81682c6defa0 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -635,6 +635,10 @@ static const struct usb_device_id products[] = { USB_DEVICE(0x0a47, 0x9601), /* Hirose USB-100 */ .driver_info = (unsigned long)&dm9601_info, }, + { + USB_DEVICE(0x0fe6, 0x8101), /* DM9601 USB to Fast Ethernet Adapter */ + .driver_info = (unsigned long)&dm9601_info, + }, {}, // END }; diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index 36bafeb353ce..129e2d330abb 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -3868,7 +3868,7 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } err = iwl_eeprom_check_version(priv); if (err) - goto out_iounmap; + goto out_free_eeprom; /* extract MAC Address */ iwl_eeprom_get_mac(priv, priv->mac_addr); @@ -3945,6 +3945,8 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; out_remove_sysfs: + destroy_workqueue(priv->workqueue); + priv->workqueue = NULL; sysfs_remove_group(&pdev->dev.kobj, &iwl_attribute_group); out_uninit_drv: iwl_uninit_drv(priv); @@ -3953,8 +3955,8 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) out_iounmap: pci_iounmap(pdev, priv->hw_base); out_pci_release_regions: - pci_release_regions(pdev); pci_set_drvdata(pdev, NULL); + pci_release_regions(pdev); out_pci_disable_device: pci_disable_device(pdev); out_ieee80211_free_hw: diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 93be74a1f139..57dd34e256d8 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -7911,7 +7911,7 @@ static int iwl3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *e CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY, 25000); if (err < 0) { IWL_DEBUG_INFO("Failed to init the card\n"); - goto out_remove_sysfs; + goto out_iounmap; } /*********************** @@ -7921,7 +7921,7 @@ static int iwl3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *e err = iwl3945_eeprom_init(priv); if (err) { IWL_ERROR("Unable to init EEPROM\n"); - goto out_remove_sysfs; + goto out_iounmap; } /* MAC Address location in EEPROM same for 3945/4965 */ get_eeprom_mac(priv, priv->mac_addr); @@ -7975,7 +7975,7 @@ static int iwl3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *e err = iwl3945_init_channel_map(priv); if (err) { IWL_ERROR("initializing regulatory failed: %d\n", err); - goto out_release_irq; + goto out_unset_hw_setting; } err = iwl3945_init_geos(priv); @@ -8045,25 +8045,22 @@ static int iwl3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *e return 0; out_remove_sysfs: + destroy_workqueue(priv->workqueue); + priv->workqueue = NULL; sysfs_remove_group(&pdev->dev.kobj, &iwl3945_attribute_group); out_free_geos: iwl3945_free_geos(priv); out_free_channel_map: iwl3945_free_channel_map(priv); - - - out_release_irq: - destroy_workqueue(priv->workqueue); - priv->workqueue = NULL; + out_unset_hw_setting: iwl3945_unset_hw_setting(priv); - out_iounmap: pci_iounmap(pdev, priv->hw_base); out_pci_release_regions: pci_release_regions(pdev); out_pci_disable_device: - pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); + pci_disable_device(pdev); out_ieee80211_free_hw: ieee80211_free_hw(priv->hw); out: diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c index 34561e6e816b..f170106bf0ae 100644 --- a/drivers/net/wireless/p54/p54common.c +++ b/drivers/net/wireless/p54/p54common.c @@ -710,10 +710,11 @@ static struct sk_buff *p54_find_tx_entry(struct ieee80211_hw *dev, __le32 req_id) { struct p54_common *priv = dev->priv; - struct sk_buff *entry = priv->tx_queue.next; + struct sk_buff *entry; unsigned long flags; spin_lock_irqsave(&priv->tx_queue.lock, flags); + entry = priv->tx_queue.next; while (entry != (struct sk_buff *)&priv->tx_queue) { struct p54_hdr *hdr = (struct p54_hdr *) entry->data; @@ -732,7 +733,7 @@ static void p54_rx_frame_sent(struct ieee80211_hw *dev, struct sk_buff *skb) struct p54_common *priv = dev->priv; struct p54_hdr *hdr = (struct p54_hdr *) skb->data; struct p54_frame_sent *payload = (struct p54_frame_sent *) hdr->data; - struct sk_buff *entry = (struct sk_buff *) priv->tx_queue.next; + struct sk_buff *entry; u32 addr = le32_to_cpu(hdr->req_id) - priv->headroom; struct memrecord *range = NULL; u32 freed = 0; @@ -741,6 +742,7 @@ static void p54_rx_frame_sent(struct ieee80211_hw *dev, struct sk_buff *skb) int count, idx; spin_lock_irqsave(&priv->tx_queue.lock, flags); + entry = (struct sk_buff *) priv->tx_queue.next; while (entry != (struct sk_buff *)&priv->tx_queue) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(entry); struct p54_hdr *entry_hdr; @@ -976,7 +978,7 @@ static int p54_assign_address(struct ieee80211_hw *dev, struct sk_buff *skb, struct p54_hdr *data, u32 len) { struct p54_common *priv = dev->priv; - struct sk_buff *entry = priv->tx_queue.next; + struct sk_buff *entry; struct sk_buff *target_skb = NULL; struct ieee80211_tx_info *info; struct memrecord *range; @@ -1014,6 +1016,7 @@ static int p54_assign_address(struct ieee80211_hw *dev, struct sk_buff *skb, } } + entry = priv->tx_queue.next; while (left--) { u32 hole_size; info = IEEE80211_SKB_CB(entry); diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c index af6b5847be5c..3e2ac2bbb12f 100644 --- a/drivers/net/wireless/rt2x00/rt2500usb.c +++ b/drivers/net/wireless/rt2x00/rt2500usb.c @@ -1952,6 +1952,8 @@ static struct usb_device_id rt2500usb_device_table[] = { { USB_DEVICE(0x13b1, 0x000d), USB_DEVICE_DATA(&rt2500usb_ops) }, { USB_DEVICE(0x13b1, 0x0011), USB_DEVICE_DATA(&rt2500usb_ops) }, { USB_DEVICE(0x13b1, 0x001a), USB_DEVICE_DATA(&rt2500usb_ops) }, + /* CNet */ + { USB_DEVICE(0x1371, 0x9022), USB_DEVICE_DATA(&rt2500usb_ops) }, /* Conceptronic */ { USB_DEVICE(0x14b2, 0x3c02), USB_DEVICE_DATA(&rt2500usb_ops) }, /* D-LINK */ @@ -1976,14 +1978,20 @@ static struct usb_device_id rt2500usb_device_table[] = { { USB_DEVICE(0x148f, 0x2570), USB_DEVICE_DATA(&rt2500usb_ops) }, { USB_DEVICE(0x148f, 0x2573), USB_DEVICE_DATA(&rt2500usb_ops) }, { USB_DEVICE(0x148f, 0x9020), USB_DEVICE_DATA(&rt2500usb_ops) }, + /* Sagem */ + { USB_DEVICE(0x079b, 0x004b), USB_DEVICE_DATA(&rt2500usb_ops) }, /* Siemens */ { USB_DEVICE(0x0681, 0x3c06), USB_DEVICE_DATA(&rt2500usb_ops) }, /* SMC */ { USB_DEVICE(0x0707, 0xee13), USB_DEVICE_DATA(&rt2500usb_ops) }, /* Spairon */ { USB_DEVICE(0x114b, 0x0110), USB_DEVICE_DATA(&rt2500usb_ops) }, + /* SURECOM */ + { USB_DEVICE(0x0769, 0x11f3), USB_DEVICE_DATA(&rt2500usb_ops) }, /* Trust */ { USB_DEVICE(0x0eb0, 0x9020), USB_DEVICE_DATA(&rt2500usb_ops) }, + /* VTech */ + { USB_DEVICE(0x0f88, 0x3012), USB_DEVICE_DATA(&rt2500usb_ops) }, /* Zinwell */ { USB_DEVICE(0x5a57, 0x0260), USB_DEVICE_DATA(&rt2500usb_ops) }, { 0, } diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c index 96a8d69f8790..cefee1b26cd8 100644 --- a/drivers/net/wireless/rt2x00/rt73usb.c +++ b/drivers/net/wireless/rt2x00/rt73usb.c @@ -2281,7 +2281,18 @@ static const struct rt2x00_ops rt73usb_ops = { */ static struct usb_device_id rt73usb_device_table[] = { /* AboCom */ + { USB_DEVICE(0x07b8, 0xb21b), USB_DEVICE_DATA(&rt73usb_ops) }, + { USB_DEVICE(0x07b8, 0xb21c), USB_DEVICE_DATA(&rt73usb_ops) }, { USB_DEVICE(0x07b8, 0xb21d), USB_DEVICE_DATA(&rt73usb_ops) }, + { USB_DEVICE(0x07b8, 0xb21e), USB_DEVICE_DATA(&rt73usb_ops) }, + { USB_DEVICE(0x07b8, 0xb21f), USB_DEVICE_DATA(&rt73usb_ops) }, + /* AL */ + { USB_DEVICE(0x14b2, 0x3c10), USB_DEVICE_DATA(&rt73usb_ops) }, + /* Amigo */ + { USB_DEVICE(0x148f, 0x9021), USB_DEVICE_DATA(&rt73usb_ops) }, + { USB_DEVICE(0x0eb0, 0x9021), USB_DEVICE_DATA(&rt73usb_ops) }, + /* AMIT */ + { USB_DEVICE(0x18c5, 0x0002), USB_DEVICE_DATA(&rt73usb_ops) }, /* Askey */ { USB_DEVICE(0x1690, 0x0722), USB_DEVICE_DATA(&rt73usb_ops) }, /* ASUS */ @@ -2294,7 +2305,9 @@ static struct usb_device_id rt73usb_device_table[] = { { USB_DEVICE(0x050d, 0x905c), USB_DEVICE_DATA(&rt73usb_ops) }, /* Billionton */ { USB_DEVICE(0x1631, 0xc019), USB_DEVICE_DATA(&rt73usb_ops) }, + { USB_DEVICE(0x08dd, 0x0120), USB_DEVICE_DATA(&rt73usb_ops) }, /* Buffalo */ + { USB_DEVICE(0x0411, 0x00d8), USB_DEVICE_DATA(&rt73usb_ops) }, { USB_DEVICE(0x0411, 0x00f4), USB_DEVICE_DATA(&rt73usb_ops) }, /* CNet */ { USB_DEVICE(0x1371, 0x9022), USB_DEVICE_DATA(&rt73usb_ops) }, @@ -2308,6 +2321,11 @@ static struct usb_device_id rt73usb_device_table[] = { { USB_DEVICE(0x07d1, 0x3c04), USB_DEVICE_DATA(&rt73usb_ops) }, { USB_DEVICE(0x07d1, 0x3c06), USB_DEVICE_DATA(&rt73usb_ops) }, { USB_DEVICE(0x07d1, 0x3c07), USB_DEVICE_DATA(&rt73usb_ops) }, + /* Edimax */ + { USB_DEVICE(0x7392, 0x7318), USB_DEVICE_DATA(&rt73usb_ops) }, + { USB_DEVICE(0x7392, 0x7618), USB_DEVICE_DATA(&rt73usb_ops) }, + /* EnGenius */ + { USB_DEVICE(0x1740, 0x3701), USB_DEVICE_DATA(&rt73usb_ops) }, /* Gemtek */ { USB_DEVICE(0x15a9, 0x0004), USB_DEVICE_DATA(&rt73usb_ops) }, /* Gigabyte */ @@ -2328,22 +2346,34 @@ static struct usb_device_id rt73usb_device_table[] = { { USB_DEVICE(0x0db0, 0xa861), USB_DEVICE_DATA(&rt73usb_ops) }, { USB_DEVICE(0x0db0, 0xa874), USB_DEVICE_DATA(&rt73usb_ops) }, /* Ralink */ + { USB_DEVICE(0x04bb, 0x093d), USB_DEVICE_DATA(&rt73usb_ops) }, { USB_DEVICE(0x148f, 0x2573), USB_DEVICE_DATA(&rt73usb_ops) }, { USB_DEVICE(0x148f, 0x2671), USB_DEVICE_DATA(&rt73usb_ops) }, /* Qcom */ { USB_DEVICE(0x18e8, 0x6196), USB_DEVICE_DATA(&rt73usb_ops) }, { USB_DEVICE(0x18e8, 0x6229), USB_DEVICE_DATA(&rt73usb_ops) }, { USB_DEVICE(0x18e8, 0x6238), USB_DEVICE_DATA(&rt73usb_ops) }, + /* Samsung */ + { USB_DEVICE(0x04e8, 0x4471), USB_DEVICE_DATA(&rt73usb_ops) }, /* Senao */ { USB_DEVICE(0x1740, 0x7100), USB_DEVICE_DATA(&rt73usb_ops) }, /* Sitecom */ - { USB_DEVICE(0x0df6, 0x9712), USB_DEVICE_DATA(&rt73usb_ops) }, + { USB_DEVICE(0x0df6, 0x0024), USB_DEVICE_DATA(&rt73usb_ops) }, + { USB_DEVICE(0x0df6, 0x0027), USB_DEVICE_DATA(&rt73usb_ops) }, + { USB_DEVICE(0x0df6, 0x002f), USB_DEVICE_DATA(&rt73usb_ops) }, { USB_DEVICE(0x0df6, 0x90ac), USB_DEVICE_DATA(&rt73usb_ops) }, + { USB_DEVICE(0x0df6, 0x9712), USB_DEVICE_DATA(&rt73usb_ops) }, /* Surecom */ { USB_DEVICE(0x0769, 0x31f3), USB_DEVICE_DATA(&rt73usb_ops) }, + /* Philips */ + { USB_DEVICE(0x0471, 0x200a), USB_DEVICE_DATA(&rt73usb_ops) }, /* Planex */ { USB_DEVICE(0x2019, 0xab01), USB_DEVICE_DATA(&rt73usb_ops) }, { USB_DEVICE(0x2019, 0xab50), USB_DEVICE_DATA(&rt73usb_ops) }, + /* Zcom */ + { USB_DEVICE(0x0cde, 0x001c), USB_DEVICE_DATA(&rt73usb_ops) }, + /* ZyXEL */ + { USB_DEVICE(0x0586, 0x3415), USB_DEVICE_DATA(&rt73usb_ops) }, { 0, } }; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 42491d728e99..37f31b5529aa 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -277,7 +277,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (*cow_ret == buf) unlock_orig = 1; - WARN_ON(!btrfs_tree_locked(buf)); + btrfs_assert_tree_locked(buf); if (parent) parent_start = parent->start; @@ -2365,7 +2365,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root if (slot >= btrfs_header_nritems(upper) - 1) return 1; - WARN_ON(!btrfs_tree_locked(path->nodes[1])); + btrfs_assert_tree_locked(path->nodes[1]); right = read_node_slot(root, upper, slot + 1); btrfs_tree_lock(right); @@ -2562,7 +2562,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root if (right_nritems == 0) return 1; - WARN_ON(!btrfs_tree_locked(path->nodes[1])); + btrfs_assert_tree_locked(path->nodes[1]); left = read_node_slot(root, path->nodes[1], slot - 1); btrfs_tree_lock(left); @@ -4101,7 +4101,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) next = read_node_slot(root, c, slot); if (!path->skip_locking) { - WARN_ON(!btrfs_tree_locked(c)); + btrfs_assert_tree_locked(c); btrfs_tree_lock(next); btrfs_set_lock_blocking(next); } @@ -4126,7 +4126,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, next, 0); if (!path->skip_locking) { - WARN_ON(!btrfs_tree_locked(path->nodes[level])); + btrfs_assert_tree_locked(path->nodes[level]); btrfs_tree_lock(next); btrfs_set_lock_blocking(next); } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index adda739a0215..3e18175248e0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -857,7 +857,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *btree_inode = root->fs_info->btree_inode; if (btrfs_header_generation(buf) == root->fs_info->running_transaction->transid) { - WARN_ON(!btrfs_tree_locked(buf)); + btrfs_assert_tree_locked(buf); /* ugh, clear_extent_buffer_dirty can be expensive */ btrfs_set_lock_blocking(buf); @@ -2361,7 +2361,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) btrfs_set_lock_blocking(buf); - WARN_ON(!btrfs_tree_locked(buf)); + btrfs_assert_tree_locked(buf); if (transid != root->fs_info->generation) { printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " "found %llu running %llu\n", diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6b5966aacf44..9abf81f71c46 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4418,13 +4418,13 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); - BUG_ON(!btrfs_tree_locked(parent)); + btrfs_assert_tree_locked(parent); parent_level = btrfs_header_level(parent); extent_buffer_get(parent); path->nodes[parent_level] = parent; path->slots[parent_level] = btrfs_header_nritems(parent); - BUG_ON(!btrfs_tree_locked(node)); + btrfs_assert_tree_locked(node); level = btrfs_header_level(node); extent_buffer_get(node); path->nodes[level] = node; diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 85506c4a3af7..47b0a88c12a2 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -220,8 +220,8 @@ int btrfs_tree_unlock(struct extent_buffer *eb) return 0; } -int btrfs_tree_locked(struct extent_buffer *eb) +void btrfs_assert_tree_locked(struct extent_buffer *eb) { - return test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags) || - spin_is_locked(&eb->lock); + if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) + assert_spin_locked(&eb->lock); } diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index 6bb0afbff928..6c4ce457168c 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -21,11 +21,11 @@ int btrfs_tree_lock(struct extent_buffer *eb); int btrfs_tree_unlock(struct extent_buffer *eb); -int btrfs_tree_locked(struct extent_buffer *eb); int btrfs_try_tree_lock(struct extent_buffer *eb); int btrfs_try_spin_lock(struct extent_buffer *eb); void btrfs_set_lock_blocking(struct extent_buffer *eb); void btrfs_clear_lock_blocking(struct extent_buffer *eb); +void btrfs_assert_tree_locked(struct extent_buffer *eb); #endif diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 384b38d3e8e2..161042746afc 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -234,7 +234,6 @@ struct cpufreq_driver { int (*suspend) (struct cpufreq_policy *policy, pm_message_t pmsg); int (*resume) (struct cpufreq_policy *policy); struct freq_attr **attr; - bool hide_interface; }; /* flags */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ec54785d34f9..659366734f3f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1079,6 +1079,7 @@ extern void synchronize_net(void); extern int register_netdevice_notifier(struct notifier_block *nb); extern int unregister_netdevice_notifier(struct notifier_block *nb); extern int init_dummy_netdev(struct net_device *dev); +extern void netdev_resync_ops(struct net_device *dev); extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); extern struct net_device *dev_get_by_index(struct net *net, int ifindex); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 545b068bcb70..54a968b4b924 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -5,6 +5,7 @@ #include <linux/slab.h> /* For kmalloc() */ #include <linux/smp.h> #include <linux/cpumask.h> +#include <linux/pfn.h> #include <asm/percpu.h> @@ -52,17 +53,18 @@ #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) -/* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ -#ifndef PERCPU_ENOUGH_ROOM +/* enough to cover all DEFINE_PER_CPUs in modules */ #ifdef CONFIG_MODULES -#define PERCPU_MODULE_RESERVE 8192 +#define PERCPU_MODULE_RESERVE (8 << 10) #else -#define PERCPU_MODULE_RESERVE 0 +#define PERCPU_MODULE_RESERVE 0 #endif +#ifndef PERCPU_ENOUGH_ROOM #define PERCPU_ENOUGH_ROOM \ - (__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE) -#endif /* PERCPU_ENOUGH_ROOM */ + (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ + PERCPU_MODULE_RESERVE) +#endif /* * Must be an lvalue. Since @var must be a simple identifier, @@ -79,35 +81,24 @@ #ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA /* minimum unit size, also is the maximum supported allocation size */ -#define PCPU_MIN_UNIT_SIZE (16UL << PAGE_SHIFT) +#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) /* * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy - * back on the first chunk if arch is manually allocating and mapping - * it for faster access (as a part of large page mapping for example). - * Note that dynamic percpu allocator covers both static and dynamic - * areas, so these values are bigger than PERCPU_MODULE_RESERVE. + * back on the first chunk for dynamic percpu allocation if arch is + * manually allocating and mapping it for faster access (as a part of + * large page mapping for example). * - * On typical configuration with modules, the following values leave - * about 8k of free space on the first chunk after boot on both x86_32 - * and 64 when module support is enabled. When module support is - * disabled, it's much tighter. + * The following values give between one and two pages of free space + * after typical minimal boot (2-way SMP, single disk and NIC) with + * both defconfig and a distro config on x86_64 and 32. More + * intelligent way to determine this would be nice. */ -#ifndef PERCPU_DYNAMIC_RESERVE -# if BITS_PER_LONG > 32 -# ifdef CONFIG_MODULES -# define PERCPU_DYNAMIC_RESERVE (6 << PAGE_SHIFT) -# else -# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) -# endif -# else -# ifdef CONFIG_MODULES -# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) -# else -# define PERCPU_DYNAMIC_RESERVE (2 << PAGE_SHIFT) -# endif -# endif -#endif /* PERCPU_DYNAMIC_RESERVE */ +#if BITS_PER_LONG > 32 +#define PERCPU_DYNAMIC_RESERVE (20 << 10) +#else +#define PERCPU_DYNAMIC_RESERVE (12 << 10) +#endif extern void *pcpu_base_addr; @@ -115,9 +106,10 @@ typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, - size_t static_size, size_t unit_size, - size_t free_size, void *base_addr, - pcpu_populate_pte_fn_t populate_pte_fn); + size_t static_size, size_t reserved_size, + ssize_t unit_size, ssize_t dyn_size, + void *base_addr, + pcpu_populate_pte_fn_t populate_pte_fn); /* * Use this to get to a cpu's version of the per-cpu object @@ -126,6 +118,8 @@ extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, */ #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) +extern void *__alloc_reserved_percpu(size_t size, size_t align); + #else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ struct percpu_data { diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 152b2f03fb86..69b56988813d 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -31,8 +31,8 @@ struct tracepoint { * Keep in sync with vmlinux.lds.h. */ -#define TPPROTO(args...) args -#define TPARGS(args...) args +#define TP_PROTO(args...) args +#define TP_ARGS(args...) args #ifdef CONFIG_TRACEPOINTS @@ -65,7 +65,7 @@ struct tracepoint { { \ if (unlikely(__tracepoint_##name.state)) \ __DO_TRACE(&__tracepoint_##name, \ - TPPROTO(proto), TPARGS(args)); \ + TP_PROTO(proto), TP_ARGS(args)); \ } \ static inline int register_trace_##name(void (*probe)(proto)) \ { \ @@ -157,7 +157,7 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_FORMAT(name, proto, args, fmt) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) -#define TRACE_EVENT_FORMAT(name, proto, args, fmt, struct, tpfmt) \ - TRACE_FORMAT(name, PARAMS(proto), PARAMS(args), PARAMS(fmt)) +#define TRACE_EVENT(name, proto, args, struct, print, assign) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #endif diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 6fc13d905c5f..ded434b032a4 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -109,11 +109,6 @@ extern struct list_head net_namespace_list; #ifdef CONFIG_NET_NS extern void __put_net(struct net *net); -static inline int net_alive(struct net *net) -{ - return net && atomic_read(&net->count); -} - static inline struct net *get_net(struct net *net) { atomic_inc(&net->count); @@ -145,11 +140,6 @@ int net_eq(const struct net *net1, const struct net *net2) } #else -static inline int net_alive(struct net *net) -{ - return 1; -} - static inline struct net *get_net(struct net *net) { return net; @@ -234,6 +224,23 @@ struct pernet_operations { void (*exit)(struct net *net); }; +/* + * Use these carefully. If you implement a network device and it + * needs per network namespace operations use device pernet operations, + * otherwise use pernet subsys operations. + * + * This is critically important. Most of the network code cleanup + * runs with the assumption that dev_remove_pack has been called so no + * new packets will arrive during and after the cleanup functions have + * been called. dev_remove_pack is not per namespace so instead the + * guarantee of no more packets arriving in a network namespace is + * provided by ensuring that all network devices and all sockets have + * left the network namespace before the cleanup methods are called. + * + * For the longest time the ipv4 icmp code was registered as a pernet + * device which caused kernel oops, and panics during network + * namespace cleanup. So please don't get this wrong. + */ extern int register_pernet_subsys(struct pernet_operations *); extern void unregister_pernet_subsys(struct pernet_operations *); extern int register_pernet_gen_subsys(int *id, struct pernet_operations *); diff --git a/include/trace/block.h b/include/trace/block.h index 25c6a1fd5b77..25b7068b819e 100644 --- a/include/trace/block.h +++ b/include/trace/block.h @@ -5,72 +5,72 @@ #include <linux/tracepoint.h> DECLARE_TRACE(block_rq_abort, - TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); + TP_PROTO(struct request_queue *q, struct request *rq), + TP_ARGS(q, rq)); DECLARE_TRACE(block_rq_insert, - TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); + TP_PROTO(struct request_queue *q, struct request *rq), + TP_ARGS(q, rq)); DECLARE_TRACE(block_rq_issue, - TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); + TP_PROTO(struct request_queue *q, struct request *rq), + TP_ARGS(q, rq)); DECLARE_TRACE(block_rq_requeue, - TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); + TP_PROTO(struct request_queue *q, struct request *rq), + TP_ARGS(q, rq)); DECLARE_TRACE(block_rq_complete, - TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); + TP_PROTO(struct request_queue *q, struct request *rq), + TP_ARGS(q, rq)); DECLARE_TRACE(block_bio_bounce, - TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); + TP_PROTO(struct request_queue *q, struct bio *bio), + TP_ARGS(q, bio)); DECLARE_TRACE(block_bio_complete, - TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); + TP_PROTO(struct request_queue *q, struct bio *bio), + TP_ARGS(q, bio)); DECLARE_TRACE(block_bio_backmerge, - TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); + TP_PROTO(struct request_queue *q, struct bio *bio), + TP_ARGS(q, bio)); DECLARE_TRACE(block_bio_frontmerge, - TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); + TP_PROTO(struct request_queue *q, struct bio *bio), + TP_ARGS(q, bio)); DECLARE_TRACE(block_bio_queue, - TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); + TP_PROTO(struct request_queue *q, struct bio *bio), + TP_ARGS(q, bio)); DECLARE_TRACE(block_getrq, - TPPROTO(struct request_queue *q, struct bio *bio, int rw), - TPARGS(q, bio, rw)); + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), + TP_ARGS(q, bio, rw)); DECLARE_TRACE(block_sleeprq, - TPPROTO(struct request_queue *q, struct bio *bio, int rw), - TPARGS(q, bio, rw)); + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), + TP_ARGS(q, bio, rw)); DECLARE_TRACE(block_plug, - TPPROTO(struct request_queue *q), - TPARGS(q)); + TP_PROTO(struct request_queue *q), + TP_ARGS(q)); DECLARE_TRACE(block_unplug_timer, - TPPROTO(struct request_queue *q), - TPARGS(q)); + TP_PROTO(struct request_queue *q), + TP_ARGS(q)); DECLARE_TRACE(block_unplug_io, - TPPROTO(struct request_queue *q), - TPARGS(q)); + TP_PROTO(struct request_queue *q), + TP_ARGS(q)); DECLARE_TRACE(block_split, - TPPROTO(struct request_queue *q, struct bio *bio, unsigned int pdu), - TPARGS(q, bio, pdu)); + TP_PROTO(struct request_queue *q, struct bio *bio, unsigned int pdu), + TP_ARGS(q, bio, pdu)); DECLARE_TRACE(block_remap, - TPPROTO(struct request_queue *q, struct bio *bio, dev_t dev, - sector_t from, sector_t to), - TPARGS(q, bio, dev, from, to)); + TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, + sector_t from, sector_t to), + TP_ARGS(q, bio, dev, from, to)); #endif diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h index 65850bc5ea06..43bcb74dd49f 100644 --- a/include/trace/irq_event_types.h +++ b/include/trace/irq_event_types.h @@ -8,26 +8,36 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM irq -TRACE_EVENT_FORMAT(irq_handler_entry, - TPPROTO(int irq, struct irqaction *action), - TPARGS(irq, action), - TPFMT("irq=%d handler=%s", irq, action->name), - TRACE_STRUCT( - TRACE_FIELD(int, irq, irq) - ), - TPRAWFMT("irq %d") +/* + * Tracepoint for entry of interrupt handler: + */ +TRACE_FORMAT(irq_handler_entry, + TP_PROTO(int irq, struct irqaction *action), + TP_ARGS(irq, action), + TP_FMT("irq=%d handler=%s", irq, action->name) ); -TRACE_EVENT_FORMAT(irq_handler_exit, - TPPROTO(int irq, struct irqaction *action, int ret), - TPARGS(irq, action, ret), - TPFMT("irq=%d handler=%s return=%s", - irq, action->name, ret ? "handled" : "unhandled"), - TRACE_STRUCT( - TRACE_FIELD(int, irq, irq) - TRACE_FIELD(int, ret, ret) +/* + * Tracepoint for return of an interrupt handler: + */ +TRACE_EVENT(irq_handler_exit, + + TP_PROTO(int irq, struct irqaction *action, int ret), + + TP_ARGS(irq, action, ret), + + TP_STRUCT__entry( + __field( int, irq ) + __field( int, ret ) ), - TPRAWFMT("irq %d ret %d") - ); + + TP_printk("irq=%d return=%s", + __entry->irq, __entry->ret ? "handled" : "unhandled"), + + TP_fast_assign( + __entry->irq = irq; + __entry->ret = ret; + ) +); #undef TRACE_SYSTEM diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h index f713d74a82b4..adccfcd2ec8f 100644 --- a/include/trace/lockdep_event_types.h +++ b/include/trace/lockdep_event_types.h @@ -1,5 +1,5 @@ -#ifndef TRACE_EVENT_FORMAT +#ifndef TRACE_FORMAT # error Do not include this file directly. # error Unless you know what you are doing. #endif @@ -10,32 +10,32 @@ #ifdef CONFIG_LOCKDEP TRACE_FORMAT(lock_acquire, - TPPROTO(struct lockdep_map *lock, unsigned int subclass, + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, struct lockdep_map *next_lock, unsigned long ip), - TPARGS(lock, subclass, trylock, read, check, next_lock, ip), - TPFMT("%s%s%s", trylock ? "try " : "", + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), + TP_FMT("%s%s%s", trylock ? "try " : "", read ? "read " : "", lock->name) ); TRACE_FORMAT(lock_release, - TPPROTO(struct lockdep_map *lock, int nested, unsigned long ip), - TPARGS(lock, nested, ip), - TPFMT("%s", lock->name) + TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + TP_ARGS(lock, nested, ip), + TP_FMT("%s", lock->name) ); #ifdef CONFIG_LOCK_STAT TRACE_FORMAT(lock_contended, - TPPROTO(struct lockdep_map *lock, unsigned long ip), - TPARGS(lock, ip), - TPFMT("%s", lock->name) + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + TP_ARGS(lock, ip), + TP_FMT("%s", lock->name) ); TRACE_FORMAT(lock_acquired, - TPPROTO(struct lockdep_map *lock, unsigned long ip), - TPARGS(lock, ip), - TPFMT("%s", lock->name) + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + TP_ARGS(lock, ip), + TP_FMT("%s", lock->name) ); #endif diff --git a/include/trace/power.h b/include/trace/power.h index 38aca537e497..ef204666e983 100644 --- a/include/trace/power.h +++ b/include/trace/power.h @@ -18,15 +18,15 @@ struct power_trace { }; DECLARE_TRACE(power_start, - TPPROTO(struct power_trace *it, unsigned int type, unsigned int state), - TPARGS(it, type, state)); + TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state), + TP_ARGS(it, type, state)); DECLARE_TRACE(power_mark, - TPPROTO(struct power_trace *it, unsigned int type, unsigned int state), - TPARGS(it, type, state)); + TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state), + TP_ARGS(it, type, state)); DECLARE_TRACE(power_end, - TPPROTO(struct power_trace *it), - TPARGS(it)); + TP_PROTO(struct power_trace *it), + TP_ARGS(it)); #endif /* _TRACE_POWER_H */ diff --git a/include/trace/sched_event_types.h b/include/trace/sched_event_types.h index a6de5c1601a0..fb37af672c88 100644 --- a/include/trace/sched_event_types.h +++ b/include/trace/sched_event_types.h @@ -1,6 +1,6 @@ /* use <trace/sched.h> instead */ -#ifndef TRACE_EVENT_FORMAT +#ifndef TRACE_EVENT # error Do not include this file directly. # error Unless you know what you are doing. #endif @@ -8,144 +8,330 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM sched -TRACE_EVENT_FORMAT(sched_kthread_stop, - TPPROTO(struct task_struct *t), - TPARGS(t), - TPFMT("task %s:%d", t->comm, t->pid), - TRACE_STRUCT( - TRACE_FIELD(pid_t, pid, t->pid) +/* + * Tracepoint for calling kthread_stop, performed to end a kthread: + */ +TRACE_EVENT(sched_kthread_stop, + + TP_PROTO(struct task_struct *t), + + TP_ARGS(t), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) ), - TPRAWFMT("task %d") - ); - -TRACE_EVENT_FORMAT(sched_kthread_stop_ret, - TPPROTO(int ret), - TPARGS(ret), - TPFMT("ret=%d", ret), - TRACE_STRUCT( - TRACE_FIELD(int, ret, ret) + + TP_printk("task %s:%d", __entry->comm, __entry->pid), + + TP_fast_assign( + memcpy(__entry->comm, t->comm, TASK_COMM_LEN); + __entry->pid = t->pid; + ) +); + +/* + * Tracepoint for the return value of the kthread stopping: + */ +TRACE_EVENT(sched_kthread_stop_ret, + + TP_PROTO(int ret), + + TP_ARGS(ret), + + TP_STRUCT__entry( + __field( int, ret ) ), - TPRAWFMT("ret=%d") - ); - -TRACE_EVENT_FORMAT(sched_wait_task, - TPPROTO(struct rq *rq, struct task_struct *p), - TPARGS(rq, p), - TPFMT("task %s:%d", p->comm, p->pid), - TRACE_STRUCT( - TRACE_FIELD(pid_t, pid, p->pid) + + TP_printk("ret %d", __entry->ret), + + TP_fast_assign( + __entry->ret = ret; + ) +); + +/* + * Tracepoint for waiting on task to unschedule: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wait_task, + + TP_PROTO(struct rq *rq, struct task_struct *p), + + TP_ARGS(rq, p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) ), - TPRAWFMT("task %d") - ); - -TRACE_EVENT_FORMAT(sched_wakeup, - TPPROTO(struct rq *rq, struct task_struct *p, int success), - TPARGS(rq, p, success), - TPFMT("task %s:%d %s", - p->comm, p->pid, success ? "succeeded" : "failed"), - TRACE_STRUCT( - TRACE_FIELD(pid_t, pid, p->pid) - TRACE_FIELD(int, success, success) + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ) +); + +/* + * Tracepoint for waking up a task: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wakeup, + + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + + TP_ARGS(rq, p, success), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, success ) ), - TPRAWFMT("task %d success=%d") - ); - -TRACE_EVENT_FORMAT(sched_wakeup_new, - TPPROTO(struct rq *rq, struct task_struct *p, int success), - TPARGS(rq, p, success), - TPFMT("task %s:%d", - p->comm, p->pid, success ? "succeeded" : "failed"), - TRACE_STRUCT( - TRACE_FIELD(pid_t, pid, p->pid) - TRACE_FIELD(int, success, success) + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->success = success; + ) +); + +/* + * Tracepoint for waking up a new task: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wakeup_new, + + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + + TP_ARGS(rq, p, success), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, success ) ), - TPRAWFMT("task %d success=%d") - ); - -TRACE_EVENT_FORMAT(sched_switch, - TPPROTO(struct rq *rq, struct task_struct *prev, - struct task_struct *next), - TPARGS(rq, prev, next), - TPFMT("task %s:%d ==> %s:%d", - prev->comm, prev->pid, next->comm, next->pid), - TRACE_STRUCT( - TRACE_FIELD(pid_t, prev_pid, prev->pid) - TRACE_FIELD(int, prev_prio, prev->prio) - TRACE_FIELD_SPECIAL(char next_comm[TASK_COMM_LEN], - next_comm, - TPCMD(memcpy(TRACE_ENTRY->next_comm, - next->comm, - TASK_COMM_LEN))) - TRACE_FIELD(pid_t, next_pid, next->pid) - TRACE_FIELD(int, next_prio, next->prio) + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->success = success; + ) +); + +/* + * Tracepoint for task switches, performed by the scheduler: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_switch, + + TP_PROTO(struct rq *rq, struct task_struct *prev, + struct task_struct *next), + + TP_ARGS(rq, prev, next), + + TP_STRUCT__entry( + __array( char, prev_comm, TASK_COMM_LEN ) + __field( pid_t, prev_pid ) + __field( int, prev_prio ) + __array( char, next_comm, TASK_COMM_LEN ) + __field( pid_t, next_pid ) + __field( int, next_prio ) ), - TPRAWFMT("prev %d:%d ==> next %s:%d:%d") - ); - -TRACE_EVENT_FORMAT(sched_migrate_task, - TPPROTO(struct task_struct *p, int orig_cpu, int dest_cpu), - TPARGS(p, orig_cpu, dest_cpu), - TPFMT("task %s:%d from: %d to: %d", - p->comm, p->pid, orig_cpu, dest_cpu), - TRACE_STRUCT( - TRACE_FIELD(pid_t, pid, p->pid) - TRACE_FIELD(int, orig_cpu, orig_cpu) - TRACE_FIELD(int, dest_cpu, dest_cpu) + + TP_printk("task %s:%d [%d] ==> %s:%d [%d]", + __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, + __entry->next_comm, __entry->next_pid, __entry->next_prio), + + TP_fast_assign( + memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + __entry->prev_pid = prev->pid; + __entry->prev_prio = prev->prio; + memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + __entry->next_pid = next->pid; + __entry->next_prio = next->prio; + ) +); + +/* + * Tracepoint for a task being migrated: + */ +TRACE_EVENT(sched_migrate_task, + + TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), + + TP_ARGS(p, orig_cpu, dest_cpu), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, orig_cpu ) + __field( int, dest_cpu ) ), - TPRAWFMT("task %d from: %d to: %d") - ); - -TRACE_EVENT_FORMAT(sched_process_free, - TPPROTO(struct task_struct *p), - TPARGS(p), - TPFMT("task %s:%d", p->comm, p->pid), - TRACE_STRUCT( - TRACE_FIELD(pid_t, pid, p->pid) + + TP_printk("task %s:%d [%d] from: %d to: %d", + __entry->comm, __entry->pid, __entry->prio, + __entry->orig_cpu, __entry->dest_cpu), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->orig_cpu = orig_cpu; + __entry->dest_cpu = dest_cpu; + ) +); + +/* + * Tracepoint for freeing a task: + */ +TRACE_EVENT(sched_process_free, + + TP_PROTO(struct task_struct *p), + + TP_ARGS(p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) ), - TPRAWFMT("task %d") - ); - -TRACE_EVENT_FORMAT(sched_process_exit, - TPPROTO(struct task_struct *p), - TPARGS(p), - TPFMT("task %s:%d", p->comm, p->pid), - TRACE_STRUCT( - TRACE_FIELD(pid_t, pid, p->pid) + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ) +); + +/* + * Tracepoint for a task exiting: + */ +TRACE_EVENT(sched_process_exit, + + TP_PROTO(struct task_struct *p), + + TP_ARGS(p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) ), - TPRAWFMT("task %d") - ); - -TRACE_EVENT_FORMAT(sched_process_wait, - TPPROTO(struct pid *pid), - TPARGS(pid), - TPFMT("pid %d", pid_nr(pid)), - TRACE_STRUCT( - TRACE_FIELD(pid_t, pid, pid_nr(pid)) + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ) +); + +/* + * Tracepoint for a waiting task: + */ +TRACE_EVENT(sched_process_wait, + + TP_PROTO(struct pid *pid), + + TP_ARGS(pid), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) ), - TPRAWFMT("task %d") - ); - -TRACE_EVENT_FORMAT(sched_process_fork, - TPPROTO(struct task_struct *parent, struct task_struct *child), - TPARGS(parent, child), - TPFMT("parent %s:%d child %s:%d", - parent->comm, parent->pid, child->comm, child->pid), - TRACE_STRUCT( - TRACE_FIELD(pid_t, parent, parent->pid) - TRACE_FIELD(pid_t, child, child->pid) + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio), + + TP_fast_assign( + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + __entry->pid = pid_nr(pid); + __entry->prio = current->prio; + ) +); + +/* + * Tracepoint for do_fork: + */ +TRACE_EVENT(sched_process_fork, + + TP_PROTO(struct task_struct *parent, struct task_struct *child), + + TP_ARGS(parent, child), + + TP_STRUCT__entry( + __array( char, parent_comm, TASK_COMM_LEN ) + __field( pid_t, parent_pid ) + __array( char, child_comm, TASK_COMM_LEN ) + __field( pid_t, child_pid ) ), - TPRAWFMT("parent %d child %d") - ); - -TRACE_EVENT_FORMAT(sched_signal_send, - TPPROTO(int sig, struct task_struct *p), - TPARGS(sig, p), - TPFMT("sig: %d task %s:%d", sig, p->comm, p->pid), - TRACE_STRUCT( - TRACE_FIELD(int, sig, sig) - TRACE_FIELD(pid_t, pid, p->pid) + + TP_printk("parent %s:%d child %s:%d", + __entry->parent_comm, __entry->parent_pid, + __entry->child_comm, __entry->child_pid), + + TP_fast_assign( + memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); + __entry->parent_pid = parent->pid; + memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); + __entry->child_pid = child->pid; + ) +); + +/* + * Tracepoint for sending a signal: + */ +TRACE_EVENT(sched_signal_send, + + TP_PROTO(int sig, struct task_struct *p), + + TP_ARGS(sig, p), + + TP_STRUCT__entry( + __field( int, sig ) + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) ), - TPRAWFMT("sig: %d task %d") - ); + + TP_printk("sig: %d task %s:%d", + __entry->sig, __entry->comm, __entry->pid), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->sig = sig; + ) +); #undef TRACE_SYSTEM diff --git a/include/trace/workqueue.h b/include/trace/workqueue.h index 867829df4571..7626523deeba 100644 --- a/include/trace/workqueue.h +++ b/include/trace/workqueue.h @@ -6,20 +6,20 @@ #include <linux/sched.h> DECLARE_TRACE(workqueue_insertion, - TPPROTO(struct task_struct *wq_thread, struct work_struct *work), - TPARGS(wq_thread, work)); + TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), + TP_ARGS(wq_thread, work)); DECLARE_TRACE(workqueue_execution, - TPPROTO(struct task_struct *wq_thread, struct work_struct *work), - TPARGS(wq_thread, work)); + TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), + TP_ARGS(wq_thread, work)); /* Trace the creation of one workqueue thread on a cpu */ DECLARE_TRACE(workqueue_creation, - TPPROTO(struct task_struct *wq_thread, int cpu), - TPARGS(wq_thread, cpu)); + TP_PROTO(struct task_struct *wq_thread, int cpu), + TP_ARGS(wq_thread, cpu)); DECLARE_TRACE(workqueue_destruction, - TPPROTO(struct task_struct *wq_thread), - TPARGS(wq_thread)); + TP_PROTO(struct task_struct *wq_thread), + TP_ARGS(wq_thread)); #endif /* __TRACE_WORKQUEUE_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 8de303bdd4e5..6715ebc3761d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1184,10 +1184,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, #endif clear_all_latency_tracing(p); - /* Our parent execution domain becomes current domain - These must match for thread signalling to apply */ - p->parent_exec_id = p->self_exec_id; - /* ok, now we should be set up.. */ p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); p->pdeath_signal = 0; @@ -1225,10 +1221,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, set_task_cpu(p, smp_processor_id()); /* CLONE_PARENT re-uses the old parent */ - if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) + if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; - else + p->parent_exec_id = current->parent_exec_id; + } else { p->real_parent = current; + p->parent_exec_id = current->self_exec_id; + } spin_lock(¤t->sighand->siglock); diff --git a/kernel/module.c b/kernel/module.c index 90a6d63d9211..8b742f2b3845 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -381,7 +381,7 @@ static void *percpu_modalloc(unsigned long size, unsigned long align, align = PAGE_SIZE; } - ptr = __alloc_percpu(size, align); + ptr = __alloc_reserved_percpu(size, align); if (!ptr) printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n", size); diff --git a/kernel/trace/events.c b/kernel/trace/events.c index f2509cbaacea..9fc918da404f 100644 --- a/kernel/trace/events.c +++ b/kernel/trace/events.c @@ -2,9 +2,7 @@ * This is the place to register all trace points as events. */ -/* someday this needs to go in a generic header */ -#define __STR(x) #x -#define STR(x) __STR(x) +#include <linux/stringify.h> #include <trace/trace_events.h> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2bfb7d11fc17..c5e1d8865fe4 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -751,12 +751,7 @@ struct ftrace_event_call { int (*regfunc)(void); void (*unregfunc)(void); int id; - struct dentry *raw_dir; - int raw_enabled; - int type; int (*raw_init)(void); - int (*raw_reg)(void); - void (*raw_unreg)(void); int (*show_format)(struct trace_seq *s); }; diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 2d4953f93560..05b176abfd30 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -27,12 +27,19 @@ */ u64 notrace trace_clock_local(void) { + unsigned long flags; + u64 clock; + /* * sched_clock() is an architecture implemented, fast, scalable, * lockless clock. It is not guaranteed to be coherent across * CPUs, nor across CPU idle events. */ - return sched_clock(); + raw_local_irq_save(flags); + clock = sched_clock(); + raw_local_irq_restore(flags); + + return clock; } /* diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h index fb4eba166433..5cca4c978bde 100644 --- a/kernel/trace/trace_event_types.h +++ b/kernel/trace/trace_event_types.h @@ -10,7 +10,7 @@ TRACE_EVENT_FORMAT(function, TRACE_FN, ftrace_entry, ignore, TRACE_FIELD(unsigned long, ip, ip) TRACE_FIELD(unsigned long, parent_ip, parent_ip) ), - TPRAWFMT(" %lx <-- %lx") + TP_RAW_FMT(" %lx <-- %lx") ); TRACE_EVENT_FORMAT(funcgraph_entry, TRACE_GRAPH_ENT, @@ -19,7 +19,7 @@ TRACE_EVENT_FORMAT(funcgraph_entry, TRACE_GRAPH_ENT, TRACE_FIELD(unsigned long, graph_ent.func, func) TRACE_FIELD(int, graph_ent.depth, depth) ), - TPRAWFMT("--> %lx (%d)") + TP_RAW_FMT("--> %lx (%d)") ); TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET, @@ -28,7 +28,7 @@ TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET, TRACE_FIELD(unsigned long, ret.func, func) TRACE_FIELD(int, ret.depth, depth) ), - TPRAWFMT("<-- %lx (%d)") + TP_RAW_FMT("<-- %lx (%d)") ); TRACE_EVENT_FORMAT(wakeup, TRACE_WAKE, ctx_switch_entry, ignore, @@ -41,7 +41,7 @@ TRACE_EVENT_FORMAT(wakeup, TRACE_WAKE, ctx_switch_entry, ignore, TRACE_FIELD(unsigned char, next_state, next_state) TRACE_FIELD(unsigned int, next_cpu, next_cpu) ), - TPRAWFMT("%u:%u:%u ==+ %u:%u:%u [%03u]") + TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]") ); TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore, @@ -54,7 +54,7 @@ TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore, TRACE_FIELD(unsigned char, next_state, next_state) TRACE_FIELD(unsigned int, next_cpu, next_cpu) ), - TPRAWFMT("%u:%u:%u ==+ %u:%u:%u [%03u]") + TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]") ); TRACE_EVENT_FORMAT(special, TRACE_SPECIAL, special_entry, ignore, @@ -63,7 +63,7 @@ TRACE_EVENT_FORMAT(special, TRACE_SPECIAL, special_entry, ignore, TRACE_FIELD(unsigned long, arg2, arg2) TRACE_FIELD(unsigned long, arg3, arg3) ), - TPRAWFMT("(%08lx) (%08lx) (%08lx)") + TP_RAW_FMT("(%08lx) (%08lx) (%08lx)") ); /* @@ -83,7 +83,7 @@ TRACE_EVENT_FORMAT(kernel_stack, TRACE_STACK, stack_entry, ignore, TRACE_FIELD(unsigned long, caller[6], stack6) TRACE_FIELD(unsigned long, caller[7], stack7) ), - TPRAWFMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" + TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n") ); @@ -98,7 +98,7 @@ TRACE_EVENT_FORMAT(user_stack, TRACE_USER_STACK, userstack_entry, ignore, TRACE_FIELD(unsigned long, caller[6], stack6) TRACE_FIELD(unsigned long, caller[7], stack7) ), - TPRAWFMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" + TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n") ); @@ -106,9 +106,10 @@ TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, TRACE_STRUCT( TRACE_FIELD(unsigned long, ip, ip) TRACE_FIELD(unsigned int, depth, depth) + TRACE_FIELD(char *, fmt, fmt) TRACE_FIELD_ZERO_CHAR(buf) ), - TPRAWFMT("%08lx (%d) %s") + TP_RAW_FMT("%08lx (%d) fmt:%p %s") ); TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore, @@ -118,7 +119,7 @@ TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore, TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, file) TRACE_FIELD(char, correct, correct) ), - TPRAWFMT("%u:%s:%s (%u)") + TP_RAW_FMT("%u:%s:%s (%u)") ); TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore, @@ -126,7 +127,7 @@ TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore, TRACE_FIELD(u64, from, from) TRACE_FIELD(u64, to, to) ), - TPRAWFMT("from: %llx to: %llx") + TP_RAW_FMT("from: %llx to: %llx") ); TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore, @@ -136,7 +137,7 @@ TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore, TRACE_FIELD(int, state_data.type, type) TRACE_FIELD(int, state_data.state, state) ), - TPRAWFMT("%llx->%llx type:%u state:%u") + TP_RAW_FMT("%llx->%llx type:%u state:%u") ); TRACE_EVENT_FORMAT(kmem_alloc, TRACE_KMEM_ALLOC, kmemtrace_alloc_entry, ignore, @@ -149,7 +150,7 @@ TRACE_EVENT_FORMAT(kmem_alloc, TRACE_KMEM_ALLOC, kmemtrace_alloc_entry, ignore, TRACE_FIELD(gfp_t, gfp_flags, gfp_flags) TRACE_FIELD(int, node, node) ), - TPRAWFMT("type:%u call_site:%lx ptr:%p req:%lu alloc:%lu" + TP_RAW_FMT("type:%u call_site:%lx ptr:%p req:%lu alloc:%lu" " flags:%x node:%d") ); @@ -159,7 +160,7 @@ TRACE_EVENT_FORMAT(kmem_free, TRACE_KMEM_FREE, kmemtrace_free_entry, ignore, TRACE_FIELD(unsigned long, call_site, call_site) TRACE_FIELD(const void *, ptr, ptr) ), - TPRAWFMT("type:%u call_site:%lx ptr:%p") + TP_RAW_FMT("type:%u call_site:%lx ptr:%p") ); #undef TRACE_SYSTEM diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 4488d90e75ef..1880a6438097 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -59,22 +59,12 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call, call->enabled = 0; call->unregfunc(); } - if (call->raw_enabled) { - call->raw_enabled = 0; - call->raw_unreg(); - } break; case 1: - if (!call->enabled && - (call->type & TRACE_EVENT_TYPE_PRINTF)) { + if (!call->enabled) { call->enabled = 1; call->regfunc(); } - if (!call->raw_enabled && - (call->type & TRACE_EVENT_TYPE_RAW)) { - call->raw_enabled = 1; - call->raw_reg(); - } break; } } @@ -300,7 +290,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, struct ftrace_event_call *call = filp->private_data; char *buf; - if (call->enabled || call->raw_enabled) + if (call->enabled) buf = "1\n"; else buf = "0\n"; @@ -346,110 +336,10 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } -static ssize_t -event_type_read(struct file *filp, char __user *ubuf, size_t cnt, - loff_t *ppos) -{ - struct ftrace_event_call *call = filp->private_data; - char buf[16]; - int r = 0; - - if (call->type & TRACE_EVENT_TYPE_PRINTF) - r += sprintf(buf, "printf\n"); - - if (call->type & TRACE_EVENT_TYPE_RAW) - r += sprintf(buf+r, "raw\n"); - - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} - -static ssize_t -event_type_write(struct file *filp, const char __user *ubuf, size_t cnt, - loff_t *ppos) -{ - struct ftrace_event_call *call = filp->private_data; - char buf[64]; - - /* - * If there's only one type, we can't change it. - * And currently we always have printf type, and we - * may or may not have raw type. - * - * This is a redundant check, the file should be read - * only if this is the case anyway. - */ - - if (!call->raw_init) - return -EPERM; - - if (cnt >= sizeof(buf)) - return -EINVAL; - - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - - buf[cnt] = 0; - - if (!strncmp(buf, "printf", 6) && - (!buf[6] || isspace(buf[6]))) { - - call->type = TRACE_EVENT_TYPE_PRINTF; - - /* - * If raw enabled, the disable it and enable - * printf type. - */ - if (call->raw_enabled) { - call->raw_enabled = 0; - call->raw_unreg(); - - call->enabled = 1; - call->regfunc(); - } - - } else if (!strncmp(buf, "raw", 3) && - (!buf[3] || isspace(buf[3]))) { - - call->type = TRACE_EVENT_TYPE_RAW; - - /* - * If printf enabled, the disable it and enable - * raw type. - */ - if (call->enabled) { - call->enabled = 0; - call->unregfunc(); - - call->raw_enabled = 1; - call->raw_reg(); - } - } else - return -EINVAL; - - *ppos += cnt; - - return cnt; -} - -static ssize_t -event_available_types_read(struct file *filp, char __user *ubuf, size_t cnt, - loff_t *ppos) -{ - struct ftrace_event_call *call = filp->private_data; - char buf[16]; - int r = 0; - - r += sprintf(buf, "printf\n"); - - if (call->raw_init) - r += sprintf(buf+r, "raw\n"); - - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} - #undef FIELD -#define FIELD(type, name) \ - #type, #name, offsetof(typeof(field), name), sizeof(field.name) +#define FIELD(type, name) \ + #type, #name, (unsigned int)offsetof(typeof(field), name), \ + (unsigned int)sizeof(field.name) static int trace_write_header(struct trace_seq *s) { @@ -457,11 +347,11 @@ static int trace_write_header(struct trace_seq *s) /* struct trace_entry */ return trace_seq_printf(s, - "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" - "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" - "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" - "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" - "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" + "\tfield:%s %s;\toffset:%u;\tsize:%u;\n" + "\tfield:%s %s;\toffset:%u;\tsize:%u;\n" + "\tfield:%s %s;\toffset:%u;\tsize:%u;\n" + "\tfield:%s %s;\toffset:%u;\tsize:%u;\n" + "\tfield:%s %s;\toffset:%u;\tsize:%u;\n" "\n", FIELD(unsigned char, type), FIELD(unsigned char, flags), @@ -469,6 +359,7 @@ static int trace_write_header(struct trace_seq *s) FIELD(int, pid), FIELD(int, tgid)); } + static ssize_t event_format_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -526,13 +417,6 @@ static const struct seq_operations show_set_event_seq_ops = { .stop = t_stop, }; -static const struct file_operations ftrace_avail_fops = { - .open = ftrace_event_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - static const struct file_operations ftrace_set_event_fops = { .open = ftrace_event_seq_open, .read = seq_read, @@ -547,17 +431,6 @@ static const struct file_operations ftrace_enable_fops = { .write = event_enable_write, }; -static const struct file_operations ftrace_type_fops = { - .open = tracing_open_generic, - .read = event_type_read, - .write = event_type_write, -}; - -static const struct file_operations ftrace_available_types_fops = { - .open = tracing_open_generic, - .read = event_available_types_read, -}; - static const struct file_operations ftrace_event_format_fops = { .open = tracing_open_generic, .read = event_format_read, @@ -646,9 +519,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) } } - /* default the output to printf */ - call->type = TRACE_EVENT_TYPE_PRINTF; - call->dir = debugfs_create_dir(call->name, d_events); if (!call->dir) { pr_warning("Could not create debugfs " @@ -664,21 +534,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) "'%s/enable' entry\n", call->name); } - /* Only let type be writable, if we can change it */ - entry = debugfs_create_file("type", - call->raw_init ? 0644 : 0444, - call->dir, call, - &ftrace_type_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'%s/type' entry\n", call->name); - - entry = debugfs_create_file("available_types", 0444, call->dir, call, - &ftrace_available_types_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'%s/available_types' entry\n", call->name); - /* A trace may not want to export its format */ if (!call->show_format) return 0; @@ -703,13 +558,6 @@ static __init int event_trace_init(void) if (!d_tracer) return 0; - entry = debugfs_create_file("available_events", 0444, d_tracer, - (void *)&show_event_seq_ops, - &ftrace_avail_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'available_events' entry\n"); - entry = debugfs_create_file("set_event", 0644, d_tracer, (void *)&show_set_event_seq_ops, &ftrace_set_event_fops); diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h index 3830a731424c..15e9bf965a18 100644 --- a/kernel/trace/trace_events_stage_1.h +++ b/kernel/trace/trace_events_stage_1.h @@ -17,20 +17,21 @@ #undef TRACE_FORMAT #define TRACE_FORMAT(call, proto, args, fmt) -#undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(name, proto, args, fmt, tstruct, tpfmt) \ - struct ftrace_raw_##name { \ - struct trace_entry ent; \ - tstruct \ - }; \ - static struct ftrace_event_call event_##name +#undef __array +#define __array(type, item, len) type item[len]; + +#undef __field +#define __field(type, item) type item; -#undef TRACE_STRUCT -#define TRACE_STRUCT(args...) args +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args -#define TRACE_FIELD(type, item, assign) \ - type item; -#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ - type_item; +#undef TRACE_EVENT +#define TRACE_EVENT(name, proto, args, tstruct, print, assign) \ + struct ftrace_raw_##name { \ + struct trace_entry ent; \ + tstruct \ + }; \ + static struct ftrace_event_call event_##name #include <trace/trace_event_types.h> diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index d24a97e74aea..d91bf4c56661 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h @@ -20,7 +20,7 @@ * * field = (typeof(field))entry; * - * ret = trace_seq_printf(s, <TPRAWFMT> "%s", <ARGS> "\n"); + * ret = trace_seq_printf(s, <TP_RAW_FMT> "%s", <ARGS> "\n"); * if (!ret) * return TRACE_TYPE_PARTIAL_LINE; * @@ -32,23 +32,14 @@ * in binary. */ -#undef TRACE_STRUCT -#define TRACE_STRUCT(args...) args +#undef __entry +#define __entry field -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign) \ - field->item, +#undef TP_printk +#define TP_printk(fmt, args...) fmt "\n", args -#undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ - field->item, - - -#undef TPRAWFMT -#define TPRAWFMT(args...) args - -#undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, print, assign) \ enum print_line_t \ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ { \ @@ -66,14 +57,76 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ \ field = (typeof(field))entry; \ \ - ret = trace_seq_printf(s, tpfmt "%s", tstruct "\n"); \ + ret = trace_seq_printf(s, print); \ if (!ret) \ return TRACE_TYPE_PARTIAL_LINE; \ \ return TRACE_TYPE_HANDLED; \ } - + #include <trace/trace_event_types.h> -#include "trace_format.h" +/* + * Setup the showing format of trace point. + * + * int + * ftrace_format_##call(struct trace_seq *s) + * { + * struct ftrace_raw_##call field; + * int ret; + * + * ret = trace_seq_printf(s, #type " " #item ";" + * " size:%d; offset:%d;\n", + * sizeof(field.type), + * offsetof(struct ftrace_raw_##call, + * item)); + * + * } + */ + +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args + +#undef __field +#define __field(type, item) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __array +#define __array(type, item, len) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __entry +#define __entry "REC" + +#undef TP_printk +#define TP_printk(fmt, args...) "%s, %s\n", #fmt, #args + +#undef TP_fast_assign +#define TP_fast_assign(args...) args + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, print, func) \ +static int \ +ftrace_format_##call(struct trace_seq *s) \ +{ \ + struct ftrace_raw_##call field; \ + int ret; \ + \ + tstruct; \ + \ + trace_seq_printf(s, "\nprint fmt: " print); \ + \ + return ret; \ +} + #include <trace/trace_event_types.h> diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 2c8d76c7dbed..3ba55d4ab073 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -35,7 +35,7 @@ * } * * - * For those macros defined with TRACE_EVENT_FORMAT: + * For those macros defined with TRACE_EVENT: * * static struct ftrace_event_call event_<call>; * @@ -106,8 +106,8 @@ * */ -#undef TPFMT -#define TPFMT(fmt, args...) fmt "\n", ##args +#undef TP_FMT +#define TP_FMT(fmt, args...) fmt "\n", ##args #define _TRACE_FORMAT(call, proto, args, fmt) \ static void ftrace_event_##call(proto) \ @@ -139,32 +139,16 @@ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ - .system = STR(TRACE_SYSTEM), \ + .system = __stringify(TRACE_SYSTEM), \ .regfunc = ftrace_reg_event_##call, \ .unregfunc = ftrace_unreg_event_##call, \ } -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign)\ - entry->item = assign; +#undef __entry +#define __entry entry -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign)\ - entry->item = assign; - -#undef TPCMD -#define TPCMD(cmd...) cmd - -#undef TRACE_ENTRY -#define TRACE_ENTRY entry - -#undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ - cmd; - -#undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ -_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, print, assign) \ \ static struct ftrace_event_call event_##call; \ \ @@ -185,7 +169,7 @@ static void ftrace_raw_event_##call(proto) \ return; \ entry = ring_buffer_event_data(event); \ \ - tstruct; \ + assign; \ \ trace_current_buffer_unlock_commit(event, irq_flags, pc); \ } \ @@ -225,11 +209,9 @@ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ - .system = STR(TRACE_SYSTEM), \ - .regfunc = ftrace_reg_event_##call, \ - .unregfunc = ftrace_unreg_event_##call, \ + .system = __stringify(TRACE_SYSTEM), \ .raw_init = ftrace_raw_init_event_##call, \ - .raw_reg = ftrace_raw_reg_event_##call, \ - .raw_unreg = ftrace_raw_unreg_event_##call, \ + .regfunc = ftrace_raw_reg_event_##call, \ + .unregfunc = ftrace_raw_unreg_event_##call, \ .show_format = ftrace_format_##call, \ } diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 0fb7be73e31c..23ae78430d58 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -15,19 +15,40 @@ #include "trace_output.h" -#include "trace_format.h" + +#undef TRACE_STRUCT +#define TRACE_STRUCT(args...) args + +#undef TRACE_FIELD +#define TRACE_FIELD(type, item, assign) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + + +#undef TRACE_FIELD_SPECIAL +#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ + ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; #undef TRACE_FIELD_ZERO_CHAR -#define TRACE_FIELD_ZERO_CHAR(item) \ - ret = trace_seq_printf(s, "\tfield: char " #item ";\t" \ - "offset:%lu;\tsize:0;\n", \ - offsetof(typeof(field), item)); \ - if (!ret) \ +#define TRACE_FIELD_ZERO_CHAR(item) \ + ret = trace_seq_printf(s, "\tfield: char " #item ";\t" \ + "offset:%u;\tsize:0;\n", \ + (unsigned int)offsetof(typeof(field), item)); \ + if (!ret) \ return 0; -#undef TPRAWFMT -#define TPRAWFMT(args...) args +#undef TP_RAW_FMT +#define TP_RAW_FMT(args...) args #undef TRACE_EVENT_FORMAT #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ @@ -57,8 +78,8 @@ ftrace_format_##call(struct trace_seq *s) \ #define TRACE_FIELD(type, item, assign)\ entry->item = assign; -#undef TPCMD -#define TPCMD(cmd...) cmd +#undef TP_CMD +#define TP_CMD(cmd...) cmd #undef TRACE_ENTRY #define TRACE_ENTRY entry diff --git a/kernel/trace/trace_format.h b/kernel/trace/trace_format.h deleted file mode 100644 index 03f9a4c165ca..000000000000 --- a/kernel/trace/trace_format.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Setup the showing format of trace point. - * - * int - * ftrace_format_##call(struct trace_seq *s) - * { - * struct ftrace_raw_##call field; - * int ret; - * - * ret = trace_seq_printf(s, #type " " #item ";" - * " size:%d; offset:%d;\n", - * sizeof(field.type), - * offsetof(struct ftrace_raw_##call, - * item)); - * - * } - */ - -#undef TRACE_STRUCT -#define TRACE_STRUCT(args...) args - -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%lu;\tsize:%lu;\n", \ - offsetof(typeof(field), item), \ - sizeof(field.item)); \ - if (!ret) \ - return 0; - - -#undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ - ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ - "offset:%lu;\tsize:%lu;\n", \ - offsetof(typeof(field), item), \ - sizeof(field.item)); \ - if (!ret) \ - return 0; - -#undef TRACE_EVENT_FORMAT -#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ -static int \ -ftrace_format_##call(struct trace_seq *s) \ -{ \ - struct ftrace_raw_##call field; \ - int ret; \ - \ - tstruct; \ - \ - trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \ - \ - return ret; \ -} - diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 453ebd3b636e..35257be6a9d6 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -837,7 +837,7 @@ static void graph_trace_open(struct trace_iterator *iter) static void graph_trace_close(struct trace_iterator *iter) { - percpu_free(iter->private); + free_percpu(iter->private); } static struct tracer graph_trace __read_mostly = { diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 7238646b8723..f907a2b29028 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -1,5 +1,6 @@ /* Include in trace.c */ +#include <linux/stringify.h> #include <linux/kthread.h> #include <linux/delay.h> @@ -100,9 +101,6 @@ static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret) #ifdef CONFIG_DYNAMIC_FTRACE -#define __STR(x) #x -#define STR(x) __STR(x) - /* Test dynamic code modification and ftrace filters */ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, struct trace_array *tr, @@ -130,7 +128,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, * start of the function names. We simply put a '*' to * accommodate them. */ - func_name = "*" STR(DYN_FTRACE_TEST_NAME); + func_name = "*" __stringify(DYN_FTRACE_TEST_NAME); /* filter only on our function */ ftrace_set_filter(func_name, strlen(func_name), 1); diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c index 46c8dc896bd3..739fdacf873b 100644 --- a/kernel/trace/trace_workqueue.c +++ b/kernel/trace/trace_workqueue.c @@ -91,7 +91,7 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu) struct cpu_workqueue_stats *cws; unsigned long flags; - WARN_ON(cpu < 0 || cpu >= num_possible_cpus()); + WARN_ON(cpu < 0); /* Workqueues are sometimes created in atomic context */ cws = kzalloc(sizeof(struct cpu_workqueue_stats), GFP_ATOMIC); @@ -175,12 +175,12 @@ static void *workqueue_stat_next(void *prev, int idx) spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) { spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); - for (++cpu ; cpu < num_possible_cpus(); cpu++) { - ret = workqueue_stat_start_cpu(cpu); - if (ret) - return ret; - } - return NULL; + do { + cpu = cpumask_next(cpu, cpu_possible_mask); + if (cpu >= nr_cpu_ids) + return NULL; + } while (!(ret = workqueue_stat_start_cpu(cpu))); + return ret; } spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 43f891b05a4b..00d59d048edf 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -122,8 +122,10 @@ void acct_update_integrals(struct task_struct *tsk) if (likely(tsk->mm)) { cputime_t time, dtime; struct timeval value; + unsigned long flags; u64 delta; + local_irq_save(flags); time = tsk->stime + tsk->utime; dtime = cputime_sub(time, tsk->acct_timexpd); jiffies_to_timeval(cputime_to_jiffies(dtime), &value); @@ -131,10 +133,12 @@ void acct_update_integrals(struct task_struct *tsk) delta = delta * USEC_PER_SEC + value.tv_usec; if (delta == 0) - return; + goto out; tsk->acct_timexpd = time; tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm); tsk->acct_vm_mem1 += delta * tsk->mm->total_vm; + out: + local_irq_restore(flags); } } diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 25f01578c856..dc1674377009 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -768,7 +768,6 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, static int format_decode(const char *fmt, struct printf_spec *spec) { const char *start = fmt; - bool sign = false; /* we finished early by reading the field width */ if (spec->type == FORMAT_TYPE_WITDH) { @@ -900,7 +899,7 @@ qualifier: case 'd': case 'i': - sign = true; + spec->flags |= SIGN; case 'u': break; @@ -912,7 +911,7 @@ qualifier: if (spec->qualifier == 'L') spec->type = FORMAT_TYPE_LONG_LONG; else if (spec->qualifier == 'l') { - if (sign) + if (spec->flags & SIGN) spec->type = FORMAT_TYPE_LONG; else spec->type = FORMAT_TYPE_ULONG; @@ -921,12 +920,12 @@ qualifier: } else if (spec->qualifier == 't') { spec->type = FORMAT_TYPE_PTRDIFF; } else if (spec->qualifier == 'h') { - if (sign) + if (spec->flags & SIGN) spec->type = FORMAT_TYPE_SHORT; else spec->type = FORMAT_TYPE_USHORT; } else { - if (sign) + if (spec->flags & SIGN) spec->type = FORMAT_TYPE_INT; else spec->type = FORMAT_TYPE_UINT; @@ -1101,8 +1100,8 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) case FORMAT_TYPE_SHORT: num = (short) va_arg(args, int); break; - case FORMAT_TYPE_UINT: - num = va_arg(args, unsigned int); + case FORMAT_TYPE_INT: + num = (int) va_arg(args, int); break; default: num = va_arg(args, unsigned int); diff --git a/mm/percpu.c b/mm/percpu.c index 3d0f5456827c..bfe6a3afaf45 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -62,7 +62,9 @@ #include <linux/pfn.h> #include <linux/rbtree.h> #include <linux/slab.h> +#include <linux/spinlock.h> #include <linux/vmalloc.h> +#include <linux/workqueue.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> @@ -80,7 +82,8 @@ struct pcpu_chunk { int map_alloc; /* # of map entries allocated */ int *map; /* allocation map */ bool immutable; /* no [de]population allowed */ - struct page *page[]; /* #cpus * UNIT_PAGES */ + struct page **page; /* points to page array */ + struct page *page_ar[]; /* #cpus * UNIT_PAGES */ }; static int pcpu_unit_pages __read_mostly; @@ -93,28 +96,42 @@ static size_t pcpu_chunk_struct_size __read_mostly; void *pcpu_base_addr __read_mostly; EXPORT_SYMBOL_GPL(pcpu_base_addr); -/* the size of kernel static area */ -static int pcpu_static_size __read_mostly; +/* optional reserved chunk, only accessible for reserved allocations */ +static struct pcpu_chunk *pcpu_reserved_chunk; +/* offset limit of the reserved chunk */ +static int pcpu_reserved_chunk_limit; /* - * One mutex to rule them all. - * - * The following mutex is grabbed in the outermost public alloc/free - * interface functions and released only when the operation is - * complete. As such, every function in this file other than the - * outermost functions are called under pcpu_mutex. - * - * It can easily be switched to use spinlock such that only the area - * allocation and page population commit are protected with it doing - * actual [de]allocation without holding any lock. However, given - * what this allocator does, I think it's better to let them run - * sequentially. + * Synchronization rules. + * + * There are two locks - pcpu_alloc_mutex and pcpu_lock. The former + * protects allocation/reclaim paths, chunks and chunk->page arrays. + * The latter is a spinlock and protects the index data structures - + * chunk slots, rbtree, chunks and area maps in chunks. + * + * During allocation, pcpu_alloc_mutex is kept locked all the time and + * pcpu_lock is grabbed and released as necessary. All actual memory + * allocations are done using GFP_KERNEL with pcpu_lock released. + * + * Free path accesses and alters only the index data structures, so it + * can be safely called from atomic context. When memory needs to be + * returned to the system, free path schedules reclaim_work which + * grabs both pcpu_alloc_mutex and pcpu_lock, unlinks chunks to be + * reclaimed, release both locks and frees the chunks. Note that it's + * necessary to grab both locks to remove a chunk from circulation as + * allocation path might be referencing the chunk with only + * pcpu_alloc_mutex locked. */ -static DEFINE_MUTEX(pcpu_mutex); +static DEFINE_MUTEX(pcpu_alloc_mutex); /* protects whole alloc and reclaim */ +static DEFINE_SPINLOCK(pcpu_lock); /* protects index data structures */ static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ static struct rb_root pcpu_addr_root = RB_ROOT; /* chunks by address */ +/* reclaim work to release fully free chunks, scheduled from free path */ +static void pcpu_reclaim(struct work_struct *work); +static DECLARE_WORK(pcpu_reclaim_work, pcpu_reclaim); + static int __pcpu_size_to_slot(int size) { int highbit = fls(size); /* size is in bytes */ @@ -161,39 +178,44 @@ static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk, } /** - * pcpu_realloc - versatile realloc - * @p: the current pointer (can be NULL for new allocations) - * @size: the current size in bytes (can be 0 for new allocations) - * @new_size: the wanted new size in bytes (can be 0 for free) + * pcpu_mem_alloc - allocate memory + * @size: bytes to allocate + * + * Allocate @size bytes. If @size is smaller than PAGE_SIZE, + * kzalloc() is used; otherwise, vmalloc() is used. The returned + * memory is always zeroed. * - * More robust realloc which can be used to allocate, resize or free a - * memory area of arbitrary size. If the needed size goes over - * PAGE_SIZE, kernel VM is used. + * CONTEXT: + * Does GFP_KERNEL allocation. * * RETURNS: - * The new pointer on success, NULL on failure. + * Pointer to the allocated area on success, NULL on failure. */ -static void *pcpu_realloc(void *p, size_t size, size_t new_size) +static void *pcpu_mem_alloc(size_t size) { - void *new; - - if (new_size <= PAGE_SIZE) - new = kmalloc(new_size, GFP_KERNEL); - else - new = vmalloc(new_size); - if (new_size && !new) - return NULL; - - memcpy(new, p, min(size, new_size)); - if (new_size > size) - memset(new + size, 0, new_size - size); + if (size <= PAGE_SIZE) + return kzalloc(size, GFP_KERNEL); + else { + void *ptr = vmalloc(size); + if (ptr) + memset(ptr, 0, size); + return ptr; + } +} +/** + * pcpu_mem_free - free memory + * @ptr: memory to free + * @size: size of the area + * + * Free @ptr. @ptr should have been allocated using pcpu_mem_alloc(). + */ +static void pcpu_mem_free(void *ptr, size_t size) +{ if (size <= PAGE_SIZE) - kfree(p); + kfree(ptr); else - vfree(p); - - return new; + vfree(ptr); } /** @@ -203,13 +225,17 @@ static void *pcpu_realloc(void *p, size_t size, size_t new_size) * * This function is called after an allocation or free changed @chunk. * New slot according to the changed state is determined and @chunk is - * moved to the slot. + * moved to the slot. Note that the reserved chunk is never put on + * chunk slots. + * + * CONTEXT: + * pcpu_lock. */ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot) { int nslot = pcpu_chunk_slot(chunk); - if (oslot != nslot) { + if (chunk != pcpu_reserved_chunk && oslot != nslot) { if (oslot < nslot) list_move(&chunk->list, &pcpu_slot[nslot]); else @@ -249,6 +275,9 @@ static struct rb_node **pcpu_chunk_rb_search(void *addr, * searchs for the chunk with the highest start address which isn't * beyond @addr. * + * CONTEXT: + * pcpu_lock. + * * RETURNS: * The address of the found chunk. */ @@ -257,6 +286,15 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) struct rb_node *n, *parent; struct pcpu_chunk *chunk; + /* is it in the reserved chunk? */ + if (pcpu_reserved_chunk) { + void *start = pcpu_reserved_chunk->vm->addr; + + if (addr >= start && addr < start + pcpu_reserved_chunk_limit) + return pcpu_reserved_chunk; + } + + /* nah... search the regular ones */ n = *pcpu_chunk_rb_search(addr, &parent); if (!n) { /* no exactly matching chunk, the parent is the closest */ @@ -280,6 +318,9 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) * @new: chunk to insert * * Insert @new into address rb tree. + * + * CONTEXT: + * pcpu_lock. */ static void pcpu_chunk_addr_insert(struct pcpu_chunk *new) { @@ -292,6 +333,66 @@ static void pcpu_chunk_addr_insert(struct pcpu_chunk *new) } /** + * pcpu_extend_area_map - extend area map for allocation + * @chunk: target chunk + * + * Extend area map of @chunk so that it can accomodate an allocation. + * A single allocation can split an area into three areas, so this + * function makes sure that @chunk->map has at least two extra slots. + * + * CONTEXT: + * pcpu_alloc_mutex, pcpu_lock. pcpu_lock is released and reacquired + * if area map is extended. + * + * RETURNS: + * 0 if noop, 1 if successfully extended, -errno on failure. + */ +static int pcpu_extend_area_map(struct pcpu_chunk *chunk) +{ + int new_alloc; + int *new; + size_t size; + + /* has enough? */ + if (chunk->map_alloc >= chunk->map_used + 2) + return 0; + + spin_unlock_irq(&pcpu_lock); + + new_alloc = PCPU_DFL_MAP_ALLOC; + while (new_alloc < chunk->map_used + 2) + new_alloc *= 2; + + new = pcpu_mem_alloc(new_alloc * sizeof(new[0])); + if (!new) { + spin_lock_irq(&pcpu_lock); + return -ENOMEM; + } + + /* + * Acquire pcpu_lock and switch to new area map. Only free + * could have happened inbetween, so map_used couldn't have + * grown. + */ + spin_lock_irq(&pcpu_lock); + BUG_ON(new_alloc < chunk->map_used + 2); + + size = chunk->map_alloc * sizeof(chunk->map[0]); + memcpy(new, chunk->map, size); + + /* + * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is + * one of the first chunks and still using static map. + */ + if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC) + pcpu_mem_free(chunk->map, size); + + chunk->map_alloc = new_alloc; + chunk->map = new; + return 0; +} + +/** * pcpu_split_block - split a map block * @chunk: chunk of interest * @i: index of map block to split @@ -306,33 +407,19 @@ static void pcpu_chunk_addr_insert(struct pcpu_chunk *new) * depending on @head, is reduced by @tail bytes and @tail byte block * is inserted after the target block. * - * RETURNS: - * 0 on success, -errno on failure. + * @chunk->map must have enough free slots to accomodate the split. + * + * CONTEXT: + * pcpu_lock. */ -static int pcpu_split_block(struct pcpu_chunk *chunk, int i, int head, int tail) +static void pcpu_split_block(struct pcpu_chunk *chunk, int i, + int head, int tail) { int nr_extra = !!head + !!tail; - int target = chunk->map_used + nr_extra; - - /* reallocation required? */ - if (chunk->map_alloc < target) { - int new_alloc = chunk->map_alloc; - int *new; - while (new_alloc < target) - new_alloc *= 2; + BUG_ON(chunk->map_alloc < chunk->map_used + nr_extra); - new = pcpu_realloc(chunk->map, - chunk->map_alloc * sizeof(new[0]), - new_alloc * sizeof(new[0])); - if (!new) - return -ENOMEM; - - chunk->map_alloc = new_alloc; - chunk->map = new; - } - - /* insert a new subblock */ + /* insert new subblocks */ memmove(&chunk->map[i + nr_extra], &chunk->map[i], sizeof(chunk->map[0]) * (chunk->map_used - i)); chunk->map_used += nr_extra; @@ -345,7 +432,6 @@ static int pcpu_split_block(struct pcpu_chunk *chunk, int i, int head, int tail) chunk->map[i++] -= tail; chunk->map[i] = tail; } - return 0; } /** @@ -358,8 +444,14 @@ static int pcpu_split_block(struct pcpu_chunk *chunk, int i, int head, int tail) * Note that this function only allocates the offset. It doesn't * populate or map the area. * + * @chunk->map must have at least two free slots. + * + * CONTEXT: + * pcpu_lock. + * * RETURNS: - * Allocated offset in @chunk on success, -errno on failure. + * Allocated offset in @chunk on success, -1 if no matching area is + * found. */ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) { @@ -367,22 +459,6 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) int max_contig = 0; int i, off; - /* - * The static chunk initially doesn't have map attached - * because kmalloc wasn't available during init. Give it one. - */ - if (unlikely(!chunk->map)) { - chunk->map = pcpu_realloc(NULL, 0, - PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); - if (!chunk->map) - return -ENOMEM; - - chunk->map_alloc = PCPU_DFL_MAP_ALLOC; - chunk->map[chunk->map_used++] = -pcpu_static_size; - if (chunk->free_size) - chunk->map[chunk->map_used++] = chunk->free_size; - } - for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) { bool is_last = i + 1 == chunk->map_used; int head, tail; @@ -423,8 +499,7 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) /* split if warranted */ if (head || tail) { - if (pcpu_split_block(chunk, i, head, tail)) - return -ENOMEM; + pcpu_split_block(chunk, i, head, tail); if (head) { i++; off += head; @@ -451,14 +526,8 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) chunk->contig_hint = max_contig; /* fully scanned */ pcpu_chunk_relocate(chunk, oslot); - /* - * Tell the upper layer that this chunk has no area left. - * Note that this is not an error condition but a notification - * to upper layer that it needs to look at other chunks. - * -ENOSPC is chosen as it isn't used in memory subsystem and - * matches the meaning in a way. - */ - return -ENOSPC; + /* tell the upper layer that this chunk has no matching area */ + return -1; } /** @@ -469,6 +538,9 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) * Free area starting from @freeme to @chunk. Note that this function * only modifies the allocation map. It doesn't depopulate or unmap * the area. + * + * CONTEXT: + * pcpu_lock. */ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) { @@ -554,6 +626,9 @@ static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, * For each cpu, depopulate and unmap pages [@page_start,@page_end) * from @chunk. If @flush is true, vcache is flushed before unmapping * and tlb after. + * + * CONTEXT: + * pcpu_alloc_mutex. */ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size, bool flush) @@ -632,6 +707,9 @@ static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) * * For each cpu, populate and map pages [@page_start,@page_end) into * @chunk. The area is cleared on return. + * + * CONTEXT: + * pcpu_alloc_mutex, does GFP_KERNEL allocation. */ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) { @@ -686,7 +764,7 @@ static void free_pcpu_chunk(struct pcpu_chunk *chunk) return; if (chunk->vm) free_vm_area(chunk->vm); - pcpu_realloc(chunk->map, chunk->map_alloc * sizeof(chunk->map[0]), 0); + pcpu_mem_free(chunk->map, chunk->map_alloc * sizeof(chunk->map[0])); kfree(chunk); } @@ -698,10 +776,10 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) if (!chunk) return NULL; - chunk->map = pcpu_realloc(NULL, 0, - PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); + chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); chunk->map_alloc = PCPU_DFL_MAP_ALLOC; chunk->map[chunk->map_used++] = pcpu_unit_size; + chunk->page = chunk->page_ar; chunk->vm = get_vm_area(pcpu_chunk_size, GFP_KERNEL); if (!chunk->vm) { @@ -717,19 +795,21 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) } /** - * __alloc_percpu - allocate percpu area + * pcpu_alloc - the percpu allocator * @size: size of area to allocate in bytes * @align: alignment of area (max PAGE_SIZE) + * @reserved: allocate from the reserved chunk if available * - * Allocate percpu area of @size bytes aligned at @align. Might - * sleep. Might trigger writeouts. + * Allocate percpu area of @size bytes aligned at @align. + * + * CONTEXT: + * Does GFP_KERNEL allocation. * * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. */ -void *__alloc_percpu(size_t size, size_t align) +static void *pcpu_alloc(size_t size, size_t align, bool reserved) { - void *ptr = NULL; struct pcpu_chunk *chunk; int slot, off; @@ -739,90 +819,192 @@ void *__alloc_percpu(size_t size, size_t align) return NULL; } - mutex_lock(&pcpu_mutex); + mutex_lock(&pcpu_alloc_mutex); + spin_lock_irq(&pcpu_lock); + + /* serve reserved allocations from the reserved chunk if available */ + if (reserved && pcpu_reserved_chunk) { + chunk = pcpu_reserved_chunk; + if (size > chunk->contig_hint || + pcpu_extend_area_map(chunk) < 0) + goto fail_unlock; + off = pcpu_alloc_area(chunk, size, align); + if (off >= 0) + goto area_found; + goto fail_unlock; + } - /* allocate area */ +restart: + /* search through normal chunks */ for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) { list_for_each_entry(chunk, &pcpu_slot[slot], list) { if (size > chunk->contig_hint) continue; + + switch (pcpu_extend_area_map(chunk)) { + case 0: + break; + case 1: + goto restart; /* pcpu_lock dropped, restart */ + default: + goto fail_unlock; + } + off = pcpu_alloc_area(chunk, size, align); if (off >= 0) goto area_found; - if (off != -ENOSPC) - goto out_unlock; } } /* hmmm... no space left, create a new chunk */ + spin_unlock_irq(&pcpu_lock); + chunk = alloc_pcpu_chunk(); if (!chunk) - goto out_unlock; + goto fail_unlock_mutex; + + spin_lock_irq(&pcpu_lock); pcpu_chunk_relocate(chunk, -1); pcpu_chunk_addr_insert(chunk); - - off = pcpu_alloc_area(chunk, size, align); - if (off < 0) - goto out_unlock; + goto restart; area_found: + spin_unlock_irq(&pcpu_lock); + /* populate, map and clear the area */ if (pcpu_populate_chunk(chunk, off, size)) { + spin_lock_irq(&pcpu_lock); pcpu_free_area(chunk, off); - goto out_unlock; + goto fail_unlock; } - ptr = __addr_to_pcpu_ptr(chunk->vm->addr + off); -out_unlock: - mutex_unlock(&pcpu_mutex); - return ptr; + mutex_unlock(&pcpu_alloc_mutex); + + return __addr_to_pcpu_ptr(chunk->vm->addr + off); + +fail_unlock: + spin_unlock_irq(&pcpu_lock); +fail_unlock_mutex: + mutex_unlock(&pcpu_alloc_mutex); + return NULL; +} + +/** + * __alloc_percpu - allocate dynamic percpu area + * @size: size of area to allocate in bytes + * @align: alignment of area (max PAGE_SIZE) + * + * Allocate percpu area of @size bytes aligned at @align. Might + * sleep. Might trigger writeouts. + * + * CONTEXT: + * Does GFP_KERNEL allocation. + * + * RETURNS: + * Percpu pointer to the allocated area on success, NULL on failure. + */ +void *__alloc_percpu(size_t size, size_t align) +{ + return pcpu_alloc(size, align, false); } EXPORT_SYMBOL_GPL(__alloc_percpu); -static void pcpu_kill_chunk(struct pcpu_chunk *chunk) +/** + * __alloc_reserved_percpu - allocate reserved percpu area + * @size: size of area to allocate in bytes + * @align: alignment of area (max PAGE_SIZE) + * + * Allocate percpu area of @size bytes aligned at @align from reserved + * percpu area if arch has set it up; otherwise, allocation is served + * from the same dynamic area. Might sleep. Might trigger writeouts. + * + * CONTEXT: + * Does GFP_KERNEL allocation. + * + * RETURNS: + * Percpu pointer to the allocated area on success, NULL on failure. + */ +void *__alloc_reserved_percpu(size_t size, size_t align) +{ + return pcpu_alloc(size, align, true); +} + +/** + * pcpu_reclaim - reclaim fully free chunks, workqueue function + * @work: unused + * + * Reclaim all fully free chunks except for the first one. + * + * CONTEXT: + * workqueue context. + */ +static void pcpu_reclaim(struct work_struct *work) { - WARN_ON(chunk->immutable); - pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false); - list_del(&chunk->list); - rb_erase(&chunk->rb_node, &pcpu_addr_root); - free_pcpu_chunk(chunk); + LIST_HEAD(todo); + struct list_head *head = &pcpu_slot[pcpu_nr_slots - 1]; + struct pcpu_chunk *chunk, *next; + + mutex_lock(&pcpu_alloc_mutex); + spin_lock_irq(&pcpu_lock); + + list_for_each_entry_safe(chunk, next, head, list) { + WARN_ON(chunk->immutable); + + /* spare the first one */ + if (chunk == list_first_entry(head, struct pcpu_chunk, list)) + continue; + + rb_erase(&chunk->rb_node, &pcpu_addr_root); + list_move(&chunk->list, &todo); + } + + spin_unlock_irq(&pcpu_lock); + mutex_unlock(&pcpu_alloc_mutex); + + list_for_each_entry_safe(chunk, next, &todo, list) { + pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false); + free_pcpu_chunk(chunk); + } } /** * free_percpu - free percpu area * @ptr: pointer to area to free * - * Free percpu area @ptr. Might sleep. + * Free percpu area @ptr. + * + * CONTEXT: + * Can be called from atomic context. */ void free_percpu(void *ptr) { void *addr = __pcpu_ptr_to_addr(ptr); struct pcpu_chunk *chunk; + unsigned long flags; int off; if (!ptr) return; - mutex_lock(&pcpu_mutex); + spin_lock_irqsave(&pcpu_lock, flags); chunk = pcpu_chunk_addr_search(addr); off = addr - chunk->vm->addr; pcpu_free_area(chunk, off); - /* the chunk became fully free, kill one if there are other free ones */ + /* if there are more than one fully free chunks, wake up grim reaper */ if (chunk->free_size == pcpu_unit_size) { struct pcpu_chunk *pos; - list_for_each_entry(pos, - &pcpu_slot[pcpu_chunk_slot(chunk)], list) + list_for_each_entry(pos, &pcpu_slot[pcpu_nr_slots - 1], list) if (pos != chunk) { - pcpu_kill_chunk(pos); + schedule_work(&pcpu_reclaim_work); break; } } - mutex_unlock(&pcpu_mutex); + spin_unlock_irqrestore(&pcpu_lock, flags); } EXPORT_SYMBOL_GPL(free_percpu); @@ -830,8 +1012,9 @@ EXPORT_SYMBOL_GPL(free_percpu); * pcpu_setup_first_chunk - initialize the first percpu chunk * @get_page_fn: callback to fetch page pointer * @static_size: the size of static percpu area in bytes - * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, 0 for auto - * @free_size: free size in bytes, 0 for auto + * @reserved_size: the size of reserved percpu area in bytes + * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto + * @dyn_size: free size for dynamic allocation in bytes, -1 for auto * @base_addr: mapped address, NULL for auto * @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary * @@ -848,13 +1031,22 @@ EXPORT_SYMBOL_GPL(free_percpu); * indicates end of pages for the cpu. Note that @get_page_fn() must * return the same number of pages for all cpus. * - * @unit_size, if non-zero, determines unit size and must be aligned - * to PAGE_SIZE and equal to or larger than @static_size + @free_size. + * @reserved_size, if non-zero, specifies the amount of bytes to + * reserve after the static area in the first chunk. This reserves + * the first chunk such that it's available only through reserved + * percpu allocation. This is primarily used to serve module percpu + * static areas on architectures where the addressing model has + * limited offset range for symbol relocations to guarantee module + * percpu symbols fall inside the relocatable range. * - * @free_size determines the number of free bytes after the static - * area in the first chunk. If zero, whatever left is available. - * Specifying non-zero value make percpu leave the area after - * @static_size + @free_size alone. + * @unit_size, if non-negative, specifies unit size and must be + * aligned to PAGE_SIZE and equal to or larger than @static_size + + * @reserved_size + @dyn_size. + * + * @dyn_size, if non-negative, limits the number of bytes available + * for dynamic allocation in the first chunk. Specifying non-negative + * value make percpu leave alone the area beyond @static_size + + * @reserved_size + @dyn_size. * * Non-null @base_addr means that the caller already allocated virtual * region for the first chunk and mapped it. percpu must not mess @@ -864,41 +1056,58 @@ EXPORT_SYMBOL_GPL(free_percpu); * @populate_pte_fn is used to populate the pagetable. NULL means the * caller already populated the pagetable. * + * If the first chunk ends up with both reserved and dynamic areas, it + * is served by two chunks - one to serve the core static and reserved + * areas and the other for the dynamic area. They share the same vm + * and page map but uses different area allocation map to stay away + * from each other. The latter chunk is circulated in the chunk slots + * and available for dynamic allocation like any other chunks. + * * RETURNS: * The determined pcpu_unit_size which can be used to initialize * percpu access. */ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, - size_t static_size, size_t unit_size, - size_t free_size, void *base_addr, + size_t static_size, size_t reserved_size, + ssize_t unit_size, ssize_t dyn_size, + void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn) { - static struct vm_struct static_vm; - struct pcpu_chunk *static_chunk; + static struct vm_struct first_vm; + static int smap[2], dmap[2]; + struct pcpu_chunk *schunk, *dchunk = NULL; unsigned int cpu; int nr_pages; int err, i; /* santiy checks */ + BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || + ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); BUG_ON(!static_size); - BUG_ON(!unit_size && free_size); - BUG_ON(unit_size && unit_size < static_size + free_size); - BUG_ON(unit_size & ~PAGE_MASK); - BUG_ON(base_addr && !unit_size); + if (unit_size >= 0) { + BUG_ON(unit_size < static_size + reserved_size + + (dyn_size >= 0 ? dyn_size : 0)); + BUG_ON(unit_size & ~PAGE_MASK); + } else { + BUG_ON(dyn_size >= 0); + BUG_ON(base_addr); + } BUG_ON(base_addr && populate_pte_fn); - if (unit_size) + if (unit_size >= 0) pcpu_unit_pages = unit_size >> PAGE_SHIFT; else pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT, - PFN_UP(static_size)); + PFN_UP(static_size + reserved_size)); - pcpu_static_size = static_size; pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); + if (dyn_size < 0) + dyn_size = pcpu_unit_size - static_size - reserved_size; + /* * Allocate chunk slots. The additional last slot is for * empty chunks. @@ -908,33 +1117,66 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, for (i = 0; i < pcpu_nr_slots; i++) INIT_LIST_HEAD(&pcpu_slot[i]); - /* init static_chunk */ - static_chunk = alloc_bootmem(pcpu_chunk_struct_size); - INIT_LIST_HEAD(&static_chunk->list); - static_chunk->vm = &static_vm; - - if (free_size) - static_chunk->free_size = free_size; - else - static_chunk->free_size = pcpu_unit_size - pcpu_static_size; - - static_chunk->contig_hint = static_chunk->free_size; + /* + * Initialize static chunk. If reserved_size is zero, the + * static chunk covers static area + dynamic allocation area + * in the first chunk. If reserved_size is not zero, it + * covers static area + reserved area (mostly used for module + * static percpu allocation). + */ + schunk = alloc_bootmem(pcpu_chunk_struct_size); + INIT_LIST_HEAD(&schunk->list); + schunk->vm = &first_vm; + schunk->map = smap; + schunk->map_alloc = ARRAY_SIZE(smap); + schunk->page = schunk->page_ar; + + if (reserved_size) { + schunk->free_size = reserved_size; + pcpu_reserved_chunk = schunk; /* not for dynamic alloc */ + } else { + schunk->free_size = dyn_size; + dyn_size = 0; /* dynamic area covered */ + } + schunk->contig_hint = schunk->free_size; + + schunk->map[schunk->map_used++] = -static_size; + if (schunk->free_size) + schunk->map[schunk->map_used++] = schunk->free_size; + + pcpu_reserved_chunk_limit = static_size + schunk->free_size; + + /* init dynamic chunk if necessary */ + if (dyn_size) { + dchunk = alloc_bootmem(sizeof(struct pcpu_chunk)); + INIT_LIST_HEAD(&dchunk->list); + dchunk->vm = &first_vm; + dchunk->map = dmap; + dchunk->map_alloc = ARRAY_SIZE(dmap); + dchunk->page = schunk->page_ar; /* share page map with schunk */ + + dchunk->contig_hint = dchunk->free_size = dyn_size; + dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit; + dchunk->map[dchunk->map_used++] = dchunk->free_size; + } /* allocate vm address */ - static_vm.flags = VM_ALLOC; - static_vm.size = pcpu_chunk_size; + first_vm.flags = VM_ALLOC; + first_vm.size = pcpu_chunk_size; if (!base_addr) - vm_area_register_early(&static_vm, PAGE_SIZE); + vm_area_register_early(&first_vm, PAGE_SIZE); else { /* * Pages already mapped. No need to remap into - * vmalloc area. In this case the static chunk can't - * be mapped or unmapped by percpu and is marked + * vmalloc area. In this case the first chunks can't + * be mapped or unmapped by percpu and are marked * immutable. */ - static_vm.addr = base_addr; - static_chunk->immutable = true; + first_vm.addr = base_addr; + schunk->immutable = true; + if (dchunk) + dchunk->immutable = true; } /* assign pages */ @@ -945,10 +1187,10 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, if (!page) break; - *pcpu_chunk_pagep(static_chunk, cpu, i) = page; + *pcpu_chunk_pagep(schunk, cpu, i) = page; } - BUG_ON(i < PFN_UP(pcpu_static_size)); + BUG_ON(i < PFN_UP(static_size)); if (nr_pages < 0) nr_pages = i; @@ -960,20 +1202,25 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, if (populate_pte_fn) { for_each_possible_cpu(cpu) for (i = 0; i < nr_pages; i++) - populate_pte_fn(pcpu_chunk_addr(static_chunk, + populate_pte_fn(pcpu_chunk_addr(schunk, cpu, i)); - err = pcpu_map(static_chunk, 0, nr_pages); + err = pcpu_map(schunk, 0, nr_pages); if (err) panic("failed to setup static percpu area, err=%d\n", err); } - /* link static_chunk in */ - pcpu_chunk_relocate(static_chunk, -1); - pcpu_chunk_addr_insert(static_chunk); + /* link the first chunk in */ + if (!dchunk) { + pcpu_chunk_relocate(schunk, -1); + pcpu_chunk_addr_insert(schunk); + } else { + pcpu_chunk_relocate(dchunk, -1); + pcpu_chunk_addr_insert(dchunk); + } /* we're done */ - pcpu_base_addr = (void *)pcpu_chunk_addr(static_chunk, 0, 0); + pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0); return pcpu_unit_size; } diff --git a/net/802/tr.c b/net/802/tr.c index 158150fee462..f47ae289d83b 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -668,3 +668,5 @@ module_init(rif_init); EXPORT_SYMBOL(tr_type_trans); EXPORT_SYMBOL(alloc_trdev); + +MODULE_LICENSE("GPL"); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 4a19acd3a32b..1b34135cf990 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -553,7 +553,7 @@ static int vlan_dev_neigh_setup(struct net_device *dev, struct neigh_parms *pa) int err = 0; if (netif_device_present(real_dev) && ops->ndo_neigh_setup) - err = ops->ndo_neigh_setup(dev, pa); + err = ops->ndo_neigh_setup(real_dev, pa); return err; } @@ -639,6 +639,7 @@ static int vlan_dev_init(struct net_device *dev) dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN; dev->netdev_ops = &vlan_netdev_ops; } + netdev_resync_ops(dev); if (is_vlan_dev(real_dev)) subclass = 1; diff --git a/net/core/dev.c b/net/core/dev.c index 72b0d26fd46d..f1129706ce7b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2267,12 +2267,6 @@ int netif_receive_skb(struct sk_buff *skb) rcu_read_lock(); - /* Don't receive packets in an exiting network namespace */ - if (!net_alive(dev_net(skb->dev))) { - kfree_skb(skb); - goto out; - } - #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); @@ -4288,6 +4282,39 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) } EXPORT_SYMBOL(netdev_fix_features); +/* Some devices need to (re-)set their netdev_ops inside + * ->init() or similar. If that happens, we have to setup + * the compat pointers again. + */ +void netdev_resync_ops(struct net_device *dev) +{ +#ifdef CONFIG_COMPAT_NET_DEV_OPS + const struct net_device_ops *ops = dev->netdev_ops; + + dev->init = ops->ndo_init; + dev->uninit = ops->ndo_uninit; + dev->open = ops->ndo_open; + dev->change_rx_flags = ops->ndo_change_rx_flags; + dev->set_rx_mode = ops->ndo_set_rx_mode; + dev->set_multicast_list = ops->ndo_set_multicast_list; + dev->set_mac_address = ops->ndo_set_mac_address; + dev->validate_addr = ops->ndo_validate_addr; + dev->do_ioctl = ops->ndo_do_ioctl; + dev->set_config = ops->ndo_set_config; + dev->change_mtu = ops->ndo_change_mtu; + dev->neigh_setup = ops->ndo_neigh_setup; + dev->tx_timeout = ops->ndo_tx_timeout; + dev->get_stats = ops->ndo_get_stats; + dev->vlan_rx_register = ops->ndo_vlan_rx_register; + dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; + dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; +#ifdef CONFIG_NET_POLL_CONTROLLER + dev->poll_controller = ops->ndo_poll_controller; +#endif +#endif +} +EXPORT_SYMBOL(netdev_resync_ops); + /** * register_netdevice - register a network device * @dev: device to register @@ -4332,27 +4359,7 @@ int register_netdevice(struct net_device *dev) * This is temporary until all network devices are converted. */ if (dev->netdev_ops) { - const struct net_device_ops *ops = dev->netdev_ops; - - dev->init = ops->ndo_init; - dev->uninit = ops->ndo_uninit; - dev->open = ops->ndo_open; - dev->change_rx_flags = ops->ndo_change_rx_flags; - dev->set_rx_mode = ops->ndo_set_rx_mode; - dev->set_multicast_list = ops->ndo_set_multicast_list; - dev->set_mac_address = ops->ndo_set_mac_address; - dev->validate_addr = ops->ndo_validate_addr; - dev->do_ioctl = ops->ndo_do_ioctl; - dev->set_config = ops->ndo_set_config; - dev->change_mtu = ops->ndo_change_mtu; - dev->tx_timeout = ops->ndo_tx_timeout; - dev->get_stats = ops->ndo_get_stats; - dev->vlan_rx_register = ops->ndo_vlan_rx_register; - dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; - dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; -#ifdef CONFIG_NET_POLL_CONTROLLER - dev->poll_controller = ops->ndo_poll_controller; -#endif + netdev_resync_ops(dev); } else { char drivername[64]; pr_info("%s (%s): not using net_device_ops yet\n", diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 6ac29a46e23e..484f58750eba 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -77,7 +77,9 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, if (endp == buf) goto err; - rtnl_lock(); + if (!rtnl_trylock()) + return -ERESTARTSYS; + if (dev_isalive(net)) { if ((ret = (*set)(net, new)) == 0) ret = len; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 2adb1a7d361f..e3bebd36f053 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -157,9 +157,6 @@ static void cleanup_net(struct work_struct *work) struct pernet_operations *ops; struct net *net; - /* Be very certain incoming network packets will not find us */ - rcu_barrier(); - net = container_of(work, struct net, work); mutex_lock(&net_mutex); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 705b33b184a3..fc562d29cc46 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1205,7 +1205,7 @@ static struct pernet_operations __net_initdata icmp_sk_ops = { int __init icmp_init(void) { - return register_pernet_device(&icmp_sk_ops); + return register_pernet_subsys(&icmp_sk_ops); } EXPORT_SYMBOL(icmp_err_convert); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 19d7b429a262..cf74c416831a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2443,7 +2443,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { void __init tcp_v4_init(void) { inet_hashinfo_init(&tcp_hashinfo); - if (register_pernet_device(&tcp_sk_ops)) + if (register_pernet_subsys(&tcp_sk_ops)) panic("Failed to create the TCP control socket.\n"); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f9afb452249c..1220e2c7831e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -493,15 +493,17 @@ static void addrconf_forward_change(struct net *net, __s32 newf) read_unlock(&dev_base_lock); } -static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) +static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) { struct net *net; net = (struct net *)table->extra2; if (p == &net->ipv6.devconf_dflt->forwarding) - return; + return 0; + + if (!rtnl_trylock()) + return -ERESTARTSYS; - rtnl_lock(); if (p == &net->ipv6.devconf_all->forwarding) { __s32 newf = net->ipv6.devconf_all->forwarding; net->ipv6.devconf_dflt->forwarding = newf; @@ -512,6 +514,7 @@ static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) if (*p) rt6_purge_dflt_routers(net); + return 1; } #endif @@ -2608,9 +2611,6 @@ static int addrconf_ifdown(struct net_device *dev, int how) ASSERT_RTNL(); - if ((dev->flags & IFF_LOOPBACK) && how == 1) - how = 0; - rt6_ifdown(net, dev); neigh_ifdown(&nd_tbl, dev); @@ -3983,7 +3983,7 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); if (write) - addrconf_fixup_forwarding(ctl, valp, val); + ret = addrconf_fixup_forwarding(ctl, valp, val); return ret; } @@ -4019,8 +4019,7 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table, } *valp = new; - addrconf_fixup_forwarding(table, valp, val); - return 1; + return addrconf_fixup_forwarding(table, valp, val); } static struct addrconf_sysctl_table @@ -4446,25 +4445,6 @@ int unregister_inet6addr_notifier(struct notifier_block *nb) EXPORT_SYMBOL(unregister_inet6addr_notifier); -static void addrconf_net_exit(struct net *net) -{ - struct net_device *dev; - - rtnl_lock(); - /* clean dev list */ - for_each_netdev(net, dev) { - if (__in6_dev_get(dev) == NULL) - continue; - addrconf_ifdown(dev, 1); - } - addrconf_ifdown(net->loopback_dev, 2); - rtnl_unlock(); -} - -static struct pernet_operations addrconf_net_ops = { - .exit = addrconf_net_exit, -}; - /* * Init / cleanup code */ @@ -4506,10 +4486,6 @@ int __init addrconf_init(void) if (err) goto errlo; - err = register_pernet_device(&addrconf_net_ops); - if (err) - return err; - register_netdevice_notifier(&ipv6_dev_notf); addrconf_verify(0); @@ -4539,15 +4515,22 @@ errlo: void addrconf_cleanup(void) { struct inet6_ifaddr *ifa; + struct net_device *dev; int i; unregister_netdevice_notifier(&ipv6_dev_notf); - unregister_pernet_device(&addrconf_net_ops); - unregister_pernet_subsys(&addrconf_ops); rtnl_lock(); + /* clean dev list */ + for_each_netdev(&init_net, dev) { + if (__in6_dev_get(dev) == NULL) + continue; + addrconf_ifdown(dev, 1); + } + addrconf_ifdown(init_net.loopback_dev, 2); + /* * Check hash table. */ @@ -4568,6 +4551,4 @@ void addrconf_cleanup(void) del_timer(&addr_chk_timer); rtnl_unlock(); - - unregister_pernet_subsys(&addrconf_net_ops); } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c802bc1658a8..da944eca2ca6 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -72,6 +72,10 @@ MODULE_LICENSE("GPL"); static struct list_head inetsw6[SOCK_MAX]; static DEFINE_SPINLOCK(inetsw6_lock); +static int disable_ipv6 = 0; +module_param_named(disable, disable_ipv6, int, 0); +MODULE_PARM_DESC(disable, "Disable IPv6 such that it is non-functional"); + static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) { const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo); @@ -991,10 +995,21 @@ static int __init inet6_init(void) { struct sk_buff *dummy_skb; struct list_head *r; - int err; + int err = 0; BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)); + /* Register the socket-side information for inet6_create. */ + for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) + INIT_LIST_HEAD(r); + + if (disable_ipv6) { + printk(KERN_INFO + "IPv6: Loaded, but administratively disabled, " + "reboot required to enable\n"); + goto out; + } + err = proto_register(&tcpv6_prot, 1); if (err) goto out; @@ -1012,10 +1027,6 @@ static int __init inet6_init(void) goto out_unregister_udplite_proto; - /* Register the socket-side information for inet6_create. */ - for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) - INIT_LIST_HEAD(r); - /* We MUST register RAW sockets before we create the ICMP6, * IGMP6, or NDISC control sockets. */ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9eb895c7a2a9..3ae3cb816563 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1084,6 +1084,13 @@ out: return 0; } +/** + * netlink_set_err - report error to broadcast listeners + * @ssk: the kernel netlink socket, as returned by netlink_kernel_create() + * @pid: the PID of a process that we want to skip (if any) + * @groups: the broadcast group that will notice the error + * @code: error code, must be negative (as usual in kernelspace) + */ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) { struct netlink_set_err_data info; @@ -1093,7 +1100,8 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) info.exclude_sk = ssk; info.pid = pid; info.group = group; - info.code = code; + /* sk->sk_err wants a positive error value */ + info.code = -code; read_lock(&nl_table_lock); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 5c72a116b1a4..f8f047b61245 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -183,13 +183,6 @@ override: if (R_tab == NULL) goto failure; - if (!est && (ret == ACT_P_CREATED || - !gen_estimator_active(&police->tcf_bstats, - &police->tcf_rate_est))) { - err = -EINVAL; - goto failure; - } - if (parm->peakrate.rate) { P_tab = qdisc_get_rtab(&parm->peakrate, tb[TCA_POLICE_PEAKRATE]); @@ -205,6 +198,12 @@ override: &police->tcf_lock, est); if (err) goto failure_unlock; + } else if (tb[TCA_POLICE_AVRATE] && + (ret == ACT_P_CREATED || + !gen_estimator_active(&police->tcf_bstats, + &police->tcf_rate_est))) { + err = -EINVAL; + goto failure_unlock; } /* No failure allowed after this point */ diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index b78e3be69013..c4986d0f7419 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -717,15 +717,20 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, static int sctp_ctl_sock_init(void) { int err; - sa_family_t family; + sa_family_t family = PF_INET; if (sctp_get_pf_specific(PF_INET6)) family = PF_INET6; - else - family = PF_INET; err = inet_ctl_sock_create(&sctp_ctl_sock, family, SOCK_SEQPACKET, IPPROTO_SCTP, &init_net); + + /* If IPv6 socket could not be created, try the IPv4 socket */ + if (err < 0 && family == PF_INET6) + err = inet_ctl_sock_create(&sctp_ctl_sock, AF_INET, + SOCK_SEQPACKET, IPPROTO_SCTP, + &init_net); + if (err < 0) { printk(KERN_ERR "SCTP: Failed to create the SCTP control socket.\n"); @@ -1322,9 +1327,8 @@ SCTP_STATIC __init int sctp_init(void) out: return status; err_v6_add_protocol: - sctp_v6_del_protocol(); -err_add_protocol: sctp_v4_del_protocol(); +err_add_protocol: inet_ctl_sock_destroy(sctp_ctl_sock); err_ctl_sock_init: sctp_v6_protosw_exit(); @@ -1335,7 +1339,6 @@ err_protosw_init: sctp_v4_pf_exit(); sctp_v6_pf_exit(); sctp_sysctl_unregister(); - list_del(&sctp_af_inet.list); free_pages((unsigned long)sctp_port_hashtable, get_order(sctp_port_hashsize * sizeof(struct sctp_bind_hashbucket))); @@ -1383,7 +1386,6 @@ SCTP_STATIC __exit void sctp_exit(void) sctp_v4_pf_exit(); sctp_sysctl_unregister(); - list_del(&sctp_af_inet.list); free_pages((unsigned long)sctp_assoc_hashtable, get_order(sctp_assoc_hashsize * diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index e1d6076b4f59..b5495aecab60 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -787,36 +787,48 @@ static void sctp_cmd_process_operr(sctp_cmd_seq_t *cmds, struct sctp_association *asoc, struct sctp_chunk *chunk) { - struct sctp_operr_chunk *operr_chunk; struct sctp_errhdr *err_hdr; + struct sctp_ulpevent *ev; - operr_chunk = (struct sctp_operr_chunk *)chunk->chunk_hdr; - err_hdr = &operr_chunk->err_hdr; + while (chunk->chunk_end > chunk->skb->data) { + err_hdr = (struct sctp_errhdr *)(chunk->skb->data); - switch (err_hdr->cause) { - case SCTP_ERROR_UNKNOWN_CHUNK: - { - struct sctp_chunkhdr *unk_chunk_hdr; + ev = sctp_ulpevent_make_remote_error(asoc, chunk, 0, + GFP_ATOMIC); + if (!ev) + return; - unk_chunk_hdr = (struct sctp_chunkhdr *)err_hdr->variable; - switch (unk_chunk_hdr->type) { - /* ADDIP 4.1 A9) If the peer responds to an ASCONF with an - * ERROR chunk reporting that it did not recognized the ASCONF - * chunk type, the sender of the ASCONF MUST NOT send any - * further ASCONF chunks and MUST stop its T-4 timer. - */ - case SCTP_CID_ASCONF: - asoc->peer.asconf_capable = 0; - sctp_add_cmd_sf(cmds, SCTP_CMD_TIMER_STOP, + sctp_ulpq_tail_event(&asoc->ulpq, ev); + + switch (err_hdr->cause) { + case SCTP_ERROR_UNKNOWN_CHUNK: + { + sctp_chunkhdr_t *unk_chunk_hdr; + + unk_chunk_hdr = (sctp_chunkhdr_t *)err_hdr->variable; + switch (unk_chunk_hdr->type) { + /* ADDIP 4.1 A9) If the peer responds to an ASCONF with + * an ERROR chunk reporting that it did not recognized + * the ASCONF chunk type, the sender of the ASCONF MUST + * NOT send any further ASCONF chunks and MUST stop its + * T-4 timer. + */ + case SCTP_CID_ASCONF: + if (asoc->peer.asconf_capable == 0) + break; + + asoc->peer.asconf_capable = 0; + sctp_add_cmd_sf(cmds, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); + break; + default: + break; + } break; + } default: break; } - break; - } - default: - break; } } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 3a0cd075914f..f88dfded0e3a 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3163,7 +3163,6 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep, sctp_cmd_seq_t *commands) { struct sctp_chunk *chunk = arg; - struct sctp_ulpevent *ev; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); @@ -3173,21 +3172,10 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep, return sctp_sf_violation_chunklen(ep, asoc, type, arg, commands); - while (chunk->chunk_end > chunk->skb->data) { - ev = sctp_ulpevent_make_remote_error(asoc, chunk, 0, - GFP_ATOMIC); - if (!ev) - goto nomem; + sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_OPERR, + SCTP_CHUNK(chunk)); - sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, - SCTP_ULPEVENT(ev)); - sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_OPERR, - SCTP_CHUNK(chunk)); - } return SCTP_DISPOSITION_CONSUME; - -nomem: - return SCTP_DISPOSITION_NOMEM; } /* diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 85c9034c59b2..bd0a16c3de5e 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -380,7 +380,8 @@ static bool is_valid_reg_rule(const struct ieee80211_reg_rule *rule) freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz; - if (freq_diff <= 0 || freq_range->max_bandwidth_khz > freq_diff) + if (freq_range->end_freq_khz <= freq_range->start_freq_khz || + freq_range->max_bandwidth_khz > freq_diff) return false; return true; diff --git a/samples/tracepoints/tp-samples-trace.h b/samples/tracepoints/tp-samples-trace.h index 01724e04c556..dffdc49878af 100644 --- a/samples/tracepoints/tp-samples-trace.h +++ b/samples/tracepoints/tp-samples-trace.h @@ -5,9 +5,9 @@ #include <linux/tracepoint.h> DECLARE_TRACE(subsys_event, - TPPROTO(struct inode *inode, struct file *file), - TPARGS(inode, file)); + TP_PROTO(struct inode *inode, struct file *file), + TP_ARGS(inode, file)); DECLARE_TRACE(subsys_eventb, - TPPROTO(void), - TPARGS()); + TP_PROTO(void), + TP_ARGS()); #endif |