diff options
author | Steve French <sfrench@us.ibm.com> | 2005-11-15 16:49:04 -0800 |
---|---|---|
committer | Steve French <sfrench@us.ibm.com> | 2005-11-15 16:49:04 -0800 |
commit | c12489ad40a9db6b17225e0f38151d41c6175fe8 (patch) | |
tree | cf7ea08c0b3bf106975047c66b8bb0b560cc4e3e | |
parent | ff7feac9638e162263463edaeb342b4f3b1ce90e (diff) | |
parent | 1e185b97b4364063f1135604b87f8d8469944233 (diff) | |
download | talos-obmc-linux-c12489ad40a9db6b17225e0f38151d41c6175fe8.tar.gz talos-obmc-linux-c12489ad40a9db6b17225e0f38151d41c6175fe8.zip |
Merge with /pub/scm/linux/kernel/git/torvalds/linux-2.6.git
184 files changed, 2723 insertions, 13071 deletions
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index 0519c9dc0065..096aed62c326 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl @@ -386,7 +386,7 @@ X!Edrivers/pnp/system.c <chapter id="blkdev"> <title>Block Devices</title> -!Edrivers/block/ll_rw_blk.c +!Eblock/ll_rw_blk.c </chapter> <chapter id="miscdev"> diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 2d65c2182161..0fe01c805480 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -1063,8 +1063,8 @@ Aside: 4.4 I/O contexts I/O contexts provide a dynamically allocated per process data area. They may be used in I/O schedulers, and in the block layer (could be used for IO statis, -priorities for example). See *io_context in drivers/block/ll_rw_blk.c, and -as-iosched.c for an example of usage in an i/o scheduler. +priorities for example). See *io_context in block/ll_rw_blk.c, and as-iosched.c +for an example of usage in an i/o scheduler. 5. Scalability related changes diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 429db4bf98ec..24fe8edad304 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -140,3 +140,12 @@ What: EXPORT_SYMBOL(lookup_hash) When: January 2006 Why: Too low-level interface. Use lookup_one_len or lookup_create instead. Who: Christoph Hellwig <hch@lst.de> + +--------------------------- + +What: START_ARRAY ioctl for md +When: July 2006 +Files: drivers/md/md.c +Why: Not reliable by design - can fail when most needed. + Alternatives exist +Who: NeilBrown <neilb@suse.de> diff --git a/Documentation/oops-tracing.txt b/Documentation/oops-tracing.txt index 9f30ac6ca47b..05960f8a748e 100644 --- a/Documentation/oops-tracing.txt +++ b/Documentation/oops-tracing.txt @@ -32,7 +32,10 @@ the disk is not available then you have three options :- has restarted. Messy but it is the only option if you have not planned for a crash. Alternatively, you can take a picture of the screen with a digital camera - not nice, but better than - nothing. + nothing. If the messages scroll off the top of the console, you + may find that booting with a higher resolution (eg, vga=791) + will allow you to read more of the text. (Caveat: This needs vesafb, + so won't help for 'early' oopses) (2) Boot with a serial console (see Documentation/serial-console.txt), run a null modem to a second machine and capture the output there diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt index ffe1c062088b..e566affeed7f 100644 --- a/Documentation/x86_64/boot-options.txt +++ b/Documentation/x86_64/boot-options.txt @@ -7,10 +7,12 @@ Machine check mce=off disable machine check mce=bootlog Enable logging of machine checks left over from booting. - Disabled by default because some BIOS leave bogus ones. + Disabled by default on AMD because some BIOS leave bogus ones. If your BIOS doesn't do that it's a good idea to enable though to make sure you log even machine check events that result - in a reboot. + in a reboot. On Intel systems it is enabled by default. + mce=nobootlog + Disable boot machine check logging. mce=tolerancelevel (number) 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic, 3: never panic or exit (for testing) @@ -122,6 +124,9 @@ SMP cpumask=MASK only use cpus with bits set in mask + additional_cpus=NUM Allow NUM more CPUs for hotplug + (defaults are specified by the BIOS or half the available CPUs) + NUMA numa=off Only set up a single NUMA node spanning all memory. @@ -188,6 +193,9 @@ Debugging kstack=N Print that many words from the kernel stack in oops dumps. + pagefaulttrace Dump all page faults. Only useful for extreme debugging + and will create a lot of output. + Misc noreplacement Don't replace instructions with more appropiate ones diff --git a/Documentation/x86_64/mm.txt b/Documentation/x86_64/mm.txt index 662b73971a67..133561b9cb0c 100644 --- a/Documentation/x86_64/mm.txt +++ b/Documentation/x86_64/mm.txt @@ -6,7 +6,7 @@ Virtual memory map with 4 level page tables: 0000000000000000 - 00007fffffffffff (=47bits) user space, different per mm hole caused by [48:63] sign extension ffff800000000000 - ffff80ffffffffff (=40bits) guard hole -ffff810000000000 - ffffc0ffffffffff (=46bits) direct mapping of phys. memory +ffff810000000000 - ffffc0ffffffffff (=46bits) direct mapping of all phys. memory ffffc10000000000 - ffffc1ffffffffff (=40bits) hole ffffc20000000000 - ffffe1ffffffffff (=45bits) vmalloc/ioremap space ... unused hole ... @@ -14,6 +14,10 @@ ffffffff80000000 - ffffffff82800000 (=40MB) kernel text mapping, from phys 0 ... unused hole ... ffffffff88000000 - fffffffffff00000 (=1919MB) module mapping space +The direct mapping covers all memory in the system upto the highest +memory address (this means in some cases it can also include PCI memory +holes) + vmalloc space is lazily synchronized into the different PML4 pages of the processes using the page fault handler, with init_level4_pgt as reference. diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 70b007e66926..4b15f5f1e254 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -652,25 +652,11 @@ endmenu menu "Power management options" -config PM - bool "Power Management support" - ---help--- - "Power Management" means that parts of your computer are shut - off or put into a power conserving "sleep" mode if they are not - being used. There are two competing standards for doing this: APM - and ACPI. If you want to use either one, say Y here and then also - to the requisite support below. - - Power Management is most important for battery powered laptop - computers; if you have a laptop, check out the Linux Laptop home - page on the WWW at <http://www.linux-on-laptops.com/> or - Tuxmobil - Linux on Mobile Computers at <http://www.tuxmobil.org/> - and the Battery Powered Linux mini-HOWTO, available from - <http://www.tldp.org/docs.html#howto>. +source "kernel/power/Kconfig" config APM tristate "Advanced Power Management Emulation" - depends on PM + depends on PM_LEGACY ---help--- APM is a BIOS specification for saving power using several different techniques. This is mostly useful for battery powered laptops with diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index 7b07acb03f3b..39a6eea300a2 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -1266,7 +1266,7 @@ static void __exit sa1111_exit(void) bus_unregister(&sa1111_bus_type); } -module_init(sa1111_init); +subsys_initcall(sa1111_init); module_exit(sa1111_exit); MODULE_DESCRIPTION("Intel Corporation SA1111 core driver"); diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index b66c13c0cc0f..f36677241ecd 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -39,17 +39,14 @@ #ifdef CONFIG_X86_64 -static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ -} extern void __init clustered_apic_check(void); -static inline int ioapic_setup_disabled(void) -{ - return 0; -} +extern int gsi_irq_sharing(int gsi); #include <asm/proto.h> +static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } + + #else /* X86 */ #ifdef CONFIG_X86_LOCAL_APIC @@ -57,6 +54,8 @@ static inline int ioapic_setup_disabled(void) #include <mach_mpparse.h> #endif /* CONFIG_X86_LOCAL_APIC */ +static inline int gsi_irq_sharing(int gsi) { return gsi; } + #endif /* X86 */ #define BAD_MADT_ENTRY(entry, end) ( \ @@ -459,7 +458,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) *irq = IO_APIC_VECTOR(gsi); else #endif - *irq = gsi; + *irq = gsi_irq_sharing(gsi); return 0; } @@ -543,7 +542,7 @@ acpi_scan_rsdp(unsigned long start, unsigned long length) * RSDP signature. */ for (offset = 0; offset < length; offset += 16) { - if (strncmp((char *)(start + offset), "RSD PTR ", sig_len)) + if (strncmp((char *)(phys_to_virt(start) + offset), "RSD PTR ", sig_len)) continue; return (start + offset); } diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index 53a1681cd964..e344ef88cfcd 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -206,9 +206,9 @@ static void __init init_amd(struct cpuinfo_x86 *c) display_cacheinfo(c); if (cpuid_eax(0x80000000) >= 0x80000008) { - c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; - if (c->x86_num_cores & (c->x86_num_cores - 1)) - c->x86_num_cores = 1; + c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; + if (c->x86_max_cores & (c->x86_max_cores - 1)) + c->x86_max_cores = 1; } #ifdef CONFIG_X86_HT @@ -217,15 +217,15 @@ static void __init init_amd(struct cpuinfo_x86 *c) * distingush the cores. Assumes number of cores is a power * of two. */ - if (c->x86_num_cores > 1) { + if (c->x86_max_cores > 1) { int cpu = smp_processor_id(); unsigned bits = 0; - while ((1 << bits) < c->x86_num_cores) + while ((1 << bits) < c->x86_max_cores) bits++; cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1); phys_proc_id[cpu] >>= bits; printk(KERN_INFO "CPU %d(%d) -> Core %d\n", - cpu, c->x86_num_cores, cpu_core_id[cpu]); + cpu, c->x86_max_cores, cpu_core_id[cpu]); } #endif } diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index c145fb30002e..31e344b26bae 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -231,10 +231,10 @@ static void __init early_cpu_detect(void) cpuid(0x00000001, &tfms, &misc, &junk, &cap0); c->x86 = (tfms >> 8) & 15; c->x86_model = (tfms >> 4) & 15; - if (c->x86 == 0xf) { + if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; + if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; - } c->x86_mask = tfms & 15; if (cap0 & (1<<19)) c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; @@ -333,7 +333,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_num_cores = 1; + c->x86_max_cores = 1; memset(&c->x86_capability, 0, sizeof c->x86_capability); if (!have_cpuid_p()) { @@ -443,52 +443,44 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) void __devinit detect_ht(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; - int index_msb, tmp; + int index_msb, core_bits; int cpu = smp_processor_id(); + cpuid(1, &eax, &ebx, &ecx, &edx); + + c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); + if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; - cpuid(1, &eax, &ebx, &ecx, &edx); smp_num_siblings = (ebx & 0xff0000) >> 16; if (smp_num_siblings == 1) { printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); } else if (smp_num_siblings > 1 ) { - index_msb = 31; if (smp_num_siblings > NR_CPUS) { printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); smp_num_siblings = 1; return; } - tmp = smp_num_siblings; - while ((tmp & 0x80000000 ) == 0) { - tmp <<=1 ; - index_msb--; - } - if (smp_num_siblings & (smp_num_siblings - 1)) - index_msb++; + + index_msb = get_count_order(smp_num_siblings); phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); printk(KERN_INFO "CPU: Physical Processor ID: %d\n", phys_proc_id[cpu]); - smp_num_siblings = smp_num_siblings / c->x86_num_cores; + smp_num_siblings = smp_num_siblings / c->x86_max_cores; - tmp = smp_num_siblings; - index_msb = 31; - while ((tmp & 0x80000000) == 0) { - tmp <<=1 ; - index_msb--; - } + index_msb = get_count_order(smp_num_siblings) ; - if (smp_num_siblings & (smp_num_siblings - 1)) - index_msb++; + core_bits = get_count_order(c->x86_max_cores); - cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); + cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & + ((1 << core_bits) - 1); - if (c->x86_num_cores > 1) + if (c->x86_max_cores > 1) printk(KERN_INFO "CPU: Processor Core ID: %d\n", cpu_core_id[cpu]); } diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index c28d26fb5f24..5e2da704f0fa 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -158,7 +158,7 @@ static void __devinit init_intel(struct cpuinfo_x86 *c) if ( p ) strcpy(c->x86_model_id, p); - c->x86_num_cores = num_cpu_cores(c); + c->x86_max_cores = num_cpu_cores(c); detect_ht(c); diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 4dc42a189ae5..fbfd374aa336 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -293,29 +293,45 @@ static struct _cpuid4_info *cpuid4_info[NR_CPUS]; #ifdef CONFIG_SMP static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) { - struct _cpuid4_info *this_leaf; + struct _cpuid4_info *this_leaf, *sibling_leaf; unsigned long num_threads_sharing; -#ifdef CONFIG_X86_HT - struct cpuinfo_x86 *c = cpu_data + cpu; -#endif + int index_msb, i; + struct cpuinfo_x86 *c = cpu_data; this_leaf = CPUID4_INFO_IDX(cpu, index); num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; if (num_threads_sharing == 1) cpu_set(cpu, this_leaf->shared_cpu_map); -#ifdef CONFIG_X86_HT - else if (num_threads_sharing == smp_num_siblings) - this_leaf->shared_cpu_map = cpu_sibling_map[cpu]; - else if (num_threads_sharing == (c->x86_num_cores * smp_num_siblings)) - this_leaf->shared_cpu_map = cpu_core_map[cpu]; - else - printk(KERN_DEBUG "Number of CPUs sharing cache didn't match " - "any known set of CPUs\n"); -#endif + else { + index_msb = get_count_order(num_threads_sharing); + + for_each_online_cpu(i) { + if (c[i].apicid >> index_msb == + c[cpu].apicid >> index_msb) { + cpu_set(i, this_leaf->shared_cpu_map); + if (i != cpu && cpuid4_info[i]) { + sibling_leaf = CPUID4_INFO_IDX(i, index); + cpu_set(cpu, sibling_leaf->shared_cpu_map); + } + } + } + } +} +static void __devinit cache_remove_shared_cpu_map(unsigned int cpu, int index) +{ + struct _cpuid4_info *this_leaf, *sibling_leaf; + int sibling; + + this_leaf = CPUID4_INFO_IDX(cpu, index); + for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { + sibling_leaf = CPUID4_INFO_IDX(sibling, index); + cpu_clear(cpu, sibling_leaf->shared_cpu_map); + } } #else static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {} +static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {} #endif static void free_cache_attributes(unsigned int cpu) @@ -574,8 +590,10 @@ static void __cpuexit cache_remove_dev(struct sys_device * sys_dev) unsigned int cpu = sys_dev->id; unsigned long i; - for (i = 0; i < num_cache_leaves; i++) + for (i = 0; i < num_cache_leaves; i++) { + cache_remove_shared_cpu_map(cpu, i); kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); + } kobject_unregister(cache_kobject[cpu]); cpuid4_cache_sysfs_exit(cpu); return; diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index dd4ebd6af7e4..1e9db198c440 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c @@ -626,6 +626,14 @@ void __init mtrr_bp_init(void) if (cpuid_eax(0x80000000) >= 0x80000008) { u32 phys_addr; phys_addr = cpuid_eax(0x80000008) & 0xff; + /* CPUID workaround for Intel 0F33/0F34 CPU */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 0xF && + boot_cpu_data.x86_model == 0x3 && + (boot_cpu_data.x86_mask == 0x3 || + boot_cpu_data.x86_mask == 0x4)) + phys_addr = 36; + size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1); size_and_mask = ~size_or_mask & 0xfff00000; } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 41b871ecf4b3..e7921315ae9d 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -94,12 +94,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (c->x86_cache_size >= 0) seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); #ifdef CONFIG_X86_HT - if (c->x86_num_cores * smp_num_siblings > 1) { + if (c->x86_max_cores * smp_num_siblings > 1) { seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]); - seq_printf(m, "siblings\t: %d\n", - c->x86_num_cores * smp_num_siblings); + seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n])); seq_printf(m, "core id\t\t: %d\n", cpu_core_id[n]); - seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores); + seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); } #endif diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index af809ccf5fbe..0248e084017c 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -21,6 +21,7 @@ #include <asm/hardirq.h> #include <asm/nmi.h> #include <asm/hw_irq.h> +#include <asm/apic.h> #include <mach_ipi.h> @@ -147,6 +148,7 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu) regs = &fixed_regs; } crash_save_this_cpu(regs, cpu); + disable_local_APIC(); atomic_dec(&waiting_for_crash_ipi); /* Assume hlt works */ halt(); @@ -186,6 +188,7 @@ static void nmi_shootdown_cpus(void) } /* Leave the nmi callback set */ + disable_local_APIC(); } #else static void nmi_shootdown_cpus(void) @@ -210,5 +213,9 @@ void machine_crash_shutdown(struct pt_regs *regs) /* Make a note of crashing cpu. Will be used in NMI callback.*/ crashing_cpu = smp_processor_id(); nmi_shootdown_cpus(); + lapic_shutdown(); +#if defined(CONFIG_X86_IO_APIC) + disable_IO_APIC(); +#endif crash_save_self(regs); } diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index bc5a9d97466b..d16520da4550 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -72,9 +72,11 @@ int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; /* Core ID of each logical CPU */ int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; +/* representing HT siblings of each logical CPU */ cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_sibling_map); +/* representing HT and core siblings of each logical CPU */ cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); @@ -442,35 +444,60 @@ static void __devinit smp_callin(void) static int cpucount; +/* representing cpus for which sibling maps can be computed */ +static cpumask_t cpu_sibling_setup_map; + static inline void set_cpu_sibling_map(int cpu) { int i; + struct cpuinfo_x86 *c = cpu_data; + + cpu_set(cpu, cpu_sibling_setup_map); if (smp_num_siblings > 1) { - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - if (cpu_core_id[cpu] == cpu_core_id[i]) { + for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (phys_proc_id[cpu] == phys_proc_id[i] && + cpu_core_id[cpu] == cpu_core_id[i]) { cpu_set(i, cpu_sibling_map[cpu]); cpu_set(cpu, cpu_sibling_map[i]); + cpu_set(i, cpu_core_map[cpu]); + cpu_set(cpu, cpu_core_map[i]); } } } else { cpu_set(cpu, cpu_sibling_map[cpu]); } - if (current_cpu_data.x86_num_cores > 1) { - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - if (phys_proc_id[cpu] == phys_proc_id[i]) { - cpu_set(i, cpu_core_map[cpu]); - cpu_set(cpu, cpu_core_map[i]); - } - } - } else { + if (current_cpu_data.x86_max_cores == 1) { cpu_core_map[cpu] = cpu_sibling_map[cpu]; + c[cpu].booted_cores = 1; + return; + } + + for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (phys_proc_id[cpu] == phys_proc_id[i]) { + cpu_set(i, cpu_core_map[cpu]); + cpu_set(cpu, cpu_core_map[i]); + /* + * Does this new cpu bringup a new core? + */ + if (cpus_weight(cpu_sibling_map[cpu]) == 1) { + /* + * for each core in package, increment + * the booted_cores for this new cpu + */ + if (first_cpu(cpu_sibling_map[i]) == i) + c[cpu].booted_cores++; + /* + * increment the core count for all + * the other cpus in this package + */ + if (i != cpu) + c[i].booted_cores++; + } else if (i != cpu && !c[cpu].booted_cores) + c[cpu].booted_cores = c[i].booted_cores; + } } } @@ -1095,11 +1122,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus) current_thread_info()->cpu = 0; smp_tune_scheduling(); - cpus_clear(cpu_sibling_map[0]); - cpu_set(0, cpu_sibling_map[0]); - cpus_clear(cpu_core_map[0]); - cpu_set(0, cpu_core_map[0]); + set_cpu_sibling_map(0); /* * If we couldn't find an SMP configuration at boot time, @@ -1278,15 +1302,24 @@ static void remove_siblinginfo(int cpu) { int sibling; + struct cpuinfo_x86 *c = cpu_data; + for_each_cpu_mask(sibling, cpu_core_map[cpu]) { + cpu_clear(cpu, cpu_core_map[sibling]); + /* + * last thread sibling in this cpu core going down + */ + if (cpus_weight(cpu_sibling_map[cpu]) == 1) + c[sibling].booted_cores--; + } + for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) cpu_clear(cpu, cpu_sibling_map[sibling]); - for_each_cpu_mask(sibling, cpu_core_map[cpu]) - cpu_clear(cpu, cpu_core_map[sibling]); cpus_clear(cpu_sibling_map[cpu]); cpus_clear(cpu_core_map[cpu]); phys_proc_id[cpu] = BAD_APICID; cpu_core_id[cpu] = BAD_APICID; + cpu_clear(cpu, cpu_sibling_setup_map); } int __cpu_disable(void) diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c index 8de658db8146..52b3ed5d2cb5 100644 --- a/arch/i386/kernel/srat.c +++ b/arch/i386/kernel/srat.c @@ -137,8 +137,8 @@ static void __init parse_memory_affinity_structure (char *sratp) "enabled and removable" : "enabled" ) ); } -#if MAX_NR_ZONES != 3 -#error "MAX_NR_ZONES != 3, chunk_to_zone requires review" +#if MAX_NR_ZONES != 4 +#error "MAX_NR_ZONES != 4, chunk_to_zone requires review" #endif /* Take a chunk of pages from page frame cstart to cend and count the number * of pages in each zone, returned via zones[]. diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 8796e12c56f3..b76ce1fe2e7f 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -58,6 +58,10 @@ config IA64_UNCACHED_ALLOCATOR bool select GENERIC_ALLOCATOR +config ZONE_DMA_IS_DMA32 + bool + default y + choice prompt "System type" default IA64_GENERIC diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index e92ea64d8040..4305d2ba76f6 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -202,12 +202,9 @@ default_idle (void) { local_irq_enable(); while (!need_resched()) { - if (can_do_pal_halt) { - local_irq_disable(); - if (!need_resched()) - safe_halt(); - local_irq_enable(); - } else + if (can_do_pal_halt) + safe_halt(); + else cpu_relax(); } } @@ -272,10 +269,14 @@ cpu_idle (void) { void (*mark_idle)(int) = ia64_mark_idle; int cpu = smp_processor_id(); - set_thread_flag(TIF_POLLING_NRFLAG); /* endless idle loop with no priority at all */ while (1) { + if (can_do_pal_halt) + clear_thread_flag(TIF_POLLING_NRFLAG); + else + set_thread_flag(TIF_POLLING_NRFLAG); + if (!need_resched()) { void (*idle)(void); #ifdef CONFIG_SMP diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c523029674e6..94df74bcc0ee 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -261,7 +261,7 @@ config PPC_ISERIES config EMBEDDED6xx bool "Embedded 6xx/7xx/7xxx-based board" - depends on PPC32 + depends on PPC32 && BROKEN config APUS bool "Amiga-APUS" @@ -305,7 +305,7 @@ config PPC_PMAC64 config PPC_PREP bool " PowerPC Reference Platform (PReP) based machines" - depends on PPC_MULTIPLATFORM && PPC32 + depends on PPC_MULTIPLATFORM && PPC32 && BROKEN select PPC_I8259 select PPC_INDIRECT_PCI default y @@ -932,6 +932,7 @@ source "arch/powerpc/oprofile/Kconfig" config KPROBES bool "Kprobes (EXPERIMENTAL)" + depends on PPC64 help Kprobes allows you to trap at almost any kernel address and execute a callback function. register_kprobe() establishes diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 5bc11bd36c1f..d41ad2e675db 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -187,7 +187,7 @@ archprepare: checkbin # Temporary hack until we have migrated to asm-powerpc include/asm: arch/$(ARCH)/include/asm -arch/$(ARCH)/include/asm: +arch/$(ARCH)/include/asm: FORCE $(Q)if [ ! -d arch/$(ARCH)/include ]; then mkdir -p arch/$(ARCH)/include; fi $(Q)ln -fsn $(srctree)/include/asm-$(OLDARCH) arch/$(ARCH)/include/asm diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 9f09dff9e11a..913962c1dae0 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -1,18 +1,33 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.14-rc4 -# Thu Oct 20 08:32:17 2005 +# Linux kernel version: 2.6.15-rc1 +# Mon Nov 14 15:27:00 2005 # +CONFIG_PPC64=y CONFIG_64BIT=y +CONFIG_PPC_MERGE=y CONFIG_MMU=y +CONFIG_GENERIC_HARDIRQS=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_ISA_DMA=y +CONFIG_PPC=y CONFIG_EARLY_PRINTK=y CONFIG_COMPAT=y +CONFIG_SYSVIPC_COMPAT=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y -CONFIG_FORCE_MAX_ZONEORDER=13 + +# +# Processor support +# +# CONFIG_POWER4_ONLY is not set +CONFIG_POWER3=y +CONFIG_POWER4=y +CONFIG_PPC_FPU=y +CONFIG_ALTIVEC=y +CONFIG_PPC_STD_MMU=y +CONFIG_SMP=y +CONFIG_NR_CPUS=128 # # Code maturity level options @@ -68,75 +83,103 @@ CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_KMOD=y CONFIG_STOP_MACHINE=y -CONFIG_SYSVIPC_COMPAT=y + +# +# Block layer +# + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" # # Platform support # -# CONFIG_PPC_ISERIES is not set CONFIG_PPC_MULTIPLATFORM=y +# CONFIG_PPC_ISERIES is not set +# CONFIG_EMBEDDED6xx is not set +# CONFIG_APUS is not set CONFIG_PPC_PSERIES=y -# CONFIG_PPC_BPA is not set # CONFIG_PPC_PMAC is not set # CONFIG_PPC_MAPLE is not set -CONFIG_PPC=y -CONFIG_PPC64=y +# CONFIG_PPC_CELL is not set CONFIG_PPC_OF=y CONFIG_XICS=y +# CONFIG_U3_DART is not set CONFIG_MPIC=y -CONFIG_ALTIVEC=y -CONFIG_PPC_SPLPAR=y -CONFIG_KEXEC=y +CONFIG_PPC_RTAS=y +CONFIG_RTAS_ERROR_LOGGING=y +CONFIG_RTAS_PROC=y +CONFIG_RTAS_FLASH=m +# CONFIG_MMIO_NVRAM is not set CONFIG_IBMVIO=y -# CONFIG_U3_DART is not set -# CONFIG_BOOTX_TEXT is not set -# CONFIG_POWER4_ONLY is not set +# CONFIG_PPC_MPC106 is not set +# CONFIG_GENERIC_TBSYNC is not set +# CONFIG_CPU_FREQ is not set +# CONFIG_WANT_EARLY_SERIAL is not set + +# +# Kernel options +# +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_1000 is not set +CONFIG_HZ=250 +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +# CONFIG_PREEMPT_BKL is not set +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set +CONFIG_FORCE_MAX_ZONEORDER=13 CONFIG_IOMMU_VMERGE=y -CONFIG_SMP=y -CONFIG_NR_CPUS=128 +CONFIG_HOTPLUG_CPU=y +CONFIG_KEXEC=y +# CONFIG_IRQ_ALL_CPUS is not set +CONFIG_PPC_SPLPAR=y +CONFIG_EEH=y +CONFIG_SCANLOG=m +CONFIG_LPARCFG=y +CONFIG_NUMA=y CONFIG_ARCH_SELECT_MEMORY_MODEL=y -CONFIG_ARCH_FLATMEM_ENABLE=y -CONFIG_ARCH_DISCONTIGMEM_ENABLE=y -CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_DEFAULT=y CONFIG_SELECT_MEMORY_MODEL=y # CONFIG_FLATMEM_MANUAL is not set -CONFIG_DISCONTIGMEM_MANUAL=y -# CONFIG_SPARSEMEM_MANUAL is not set -CONFIG_DISCONTIGMEM=y -CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_SPARSEMEM=y CONFIG_NEED_MULTIPLE_NODES=y +CONFIG_HAVE_MEMORY_PRESENT=y # CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPARSEMEM_EXTREME=y +# CONFIG_MEMORY_HOTPLUG is not set +CONFIG_SPLIT_PTLOCK_CPUS=4096 CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y CONFIG_NODES_SPAN_OTHER_NODES=y -CONFIG_NUMA=y +# CONFIG_PPC_64K_PAGES is not set CONFIG_SCHED_SMT=y -CONFIG_PREEMPT_NONE=y -# CONFIG_PREEMPT_VOLUNTARY is not set -# CONFIG_PREEMPT is not set -# CONFIG_PREEMPT_BKL is not set -# CONFIG_HZ_100 is not set -CONFIG_HZ_250=y -# CONFIG_HZ_1000 is not set -CONFIG_HZ=250 -CONFIG_EEH=y -CONFIG_GENERIC_HARDIRQS=y -CONFIG_PPC_RTAS=y -CONFIG_RTAS_PROC=y -CONFIG_RTAS_FLASH=m -CONFIG_SCANLOG=m -CONFIG_LPARCFG=y -CONFIG_SECCOMP=y -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set -CONFIG_HOTPLUG_CPU=y CONFIG_PROC_DEVICETREE=y # CONFIG_CMDLINE_BOOL is not set +# CONFIG_PM is not set +CONFIG_SECCOMP=y CONFIG_ISA_DMA_API=y # -# Bus Options +# Bus options # +CONFIG_GENERIC_ISA_DMA=y +CONFIG_PPC_I8259=y +# CONFIG_PPC_INDIRECT_PCI is not set CONFIG_PCI=y CONFIG_PCI_DOMAINS=y CONFIG_PCI_LEGACY_PROC=y @@ -156,6 +199,7 @@ CONFIG_HOTPLUG_PCI=m # CONFIG_HOTPLUG_PCI_SHPC is not set CONFIG_HOTPLUG_PCI_RPA=m CONFIG_HOTPLUG_PCI_RPA_DLPAR=m +CONFIG_KERNEL_START=0xc000000000000000 # # Networking @@ -197,6 +241,10 @@ CONFIG_TCP_CONG_BIC=y # CONFIG_IPV6 is not set CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set + +# +# Core Netfilter Configuration +# CONFIG_NETFILTER_NETLINK=y CONFIG_NETFILTER_NETLINK_QUEUE=m CONFIG_NETFILTER_NETLINK_LOG=m @@ -299,6 +347,10 @@ CONFIG_LLC=y # CONFIG_NET_DIVERT is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# # CONFIG_NET_SCHED is not set CONFIG_NET_CLS_ROUTE=y @@ -368,14 +420,6 @@ CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_BLK_DEV_INITRD=y # CONFIG_CDROM_PKTCDVD is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_DEADLINE=y -CONFIG_IOSCHED_CFQ=y # CONFIG_ATA_OVER_ETH is not set # @@ -473,6 +517,7 @@ CONFIG_SCSI_ISCSI_ATTRS=m # # SCSI low-level drivers # +# CONFIG_ISCSI_TCP is not set # CONFIG_BLK_DEV_3W_XXXX_RAID is not set # CONFIG_SCSI_3W_9XXX is not set # CONFIG_SCSI_ACARD is not set @@ -559,6 +604,7 @@ CONFIG_DM_MULTIPATH_EMC=m # # Macintosh device drivers # +# CONFIG_WINDFARM is not set # # Network device support @@ -645,7 +691,6 @@ CONFIG_IXGB=m # CONFIG_IXGB_NAPI is not set CONFIG_S2IO=m # CONFIG_S2IO_NAPI is not set -# CONFIG_2BUFF_MODE is not set # # Token Ring devices @@ -674,6 +719,7 @@ CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +# CONFIG_PPP_MPPE is not set CONFIG_PPPOE=m # CONFIG_SLIP is not set # CONFIG_NET_FC is not set @@ -784,6 +830,8 @@ CONFIG_HVCS=m # # CONFIG_WATCHDOG is not set # CONFIG_RTC is not set +CONFIG_GEN_RTC=y +# CONFIG_GEN_RTC_X is not set # CONFIG_DTLK is not set # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set @@ -801,6 +849,7 @@ CONFIG_MAX_RAW_DEVS=1024 # TPM devices # # CONFIG_TCG_TPM is not set +# CONFIG_TELCLOCK is not set # # I2C support @@ -852,6 +901,7 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_RTC8564 is not set # CONFIG_SENSORS_MAX6875 is not set +# CONFIG_RTC_X1205_I2C is not set # CONFIG_I2C_DEBUG_CORE is not set # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set @@ -893,7 +943,6 @@ CONFIG_FB=y CONFIG_FB_CFB_FILLRECT=y CONFIG_FB_CFB_COPYAREA=y CONFIG_FB_CFB_IMAGEBLIT=y -CONFIG_FB_SOFT_CURSOR=y CONFIG_FB_MACMODES=y CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y @@ -905,6 +954,7 @@ CONFIG_FB_OF=y # CONFIG_FB_ASILIANT is not set # CONFIG_FB_IMSTT is not set # CONFIG_FB_VGA16 is not set +# CONFIG_FB_S1D13XXX is not set # CONFIG_FB_NVIDIA is not set # CONFIG_FB_RIVA is not set CONFIG_FB_MATROX=y @@ -927,7 +977,6 @@ CONFIG_FB_RADEON_I2C=y # CONFIG_FB_VOODOO1 is not set # CONFIG_FB_CYBLA is not set # CONFIG_FB_TRIDENT is not set -# CONFIG_FB_S1D13XXX is not set # CONFIG_FB_VIRTUAL is not set # @@ -936,6 +985,7 @@ CONFIG_FB_RADEON_I2C=y # CONFIG_VGA_CONSOLE is not set CONFIG_DUMMY_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE=y +# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set # CONFIG_FONTS is not set CONFIG_FONT_8x8=y CONFIG_FONT_8x16=y @@ -990,12 +1040,15 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y # # USB Device Class drivers # -# CONFIG_USB_BLUETOOTH_TTY is not set # CONFIG_USB_ACM is not set # CONFIG_USB_PRINTER is not set # -# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# + +# +# may also be needed; see USB_STORAGE Help for more information # CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_DEBUG is not set @@ -1106,6 +1159,7 @@ CONFIG_INFINIBAND_MTHCA=m # CONFIG_INFINIBAND_MTHCA_DEBUG is not set CONFIG_INFINIBAND_IPOIB=m # CONFIG_INFINIBAND_IPOIB_DEBUG is not set +# CONFIG_INFINIBAND_SRP is not set # # SN Devices @@ -1288,10 +1342,25 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_NLS_UTF8 is not set # -# Profiling support +# Library routines +# +CONFIG_CRC_CCITT=m +# CONFIG_CRC16 is not set +CONFIG_CRC32=y +CONFIG_LIBCRC32C=m +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=m +CONFIG_TEXTSEARCH=y +CONFIG_TEXTSEARCH_KMP=m +CONFIG_TEXTSEARCH_BM=m +CONFIG_TEXTSEARCH_FSM=m + +# +# Instrumentation Support # CONFIG_PROFILING=y CONFIG_OPROFILE=y +# CONFIG_KPROBES is not set # # Kernel hacking @@ -1308,14 +1377,15 @@ CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set CONFIG_DEBUG_FS=y +# CONFIG_DEBUG_VM is not set +# CONFIG_RCU_TORTURE_TEST is not set CONFIG_DEBUG_STACKOVERFLOW=y -# CONFIG_KPROBES is not set CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUGGER=y CONFIG_XMON=y CONFIG_XMON_DEFAULT=y -# CONFIG_PPCDBG is not set CONFIG_IRQSTACKS=y +# CONFIG_BOOTX_TEXT is not set # # Security options @@ -1355,17 +1425,3 @@ CONFIG_CRYPTO_TEST=m # # Hardware crypto devices # - -# -# Library routines -# -CONFIG_CRC_CCITT=m -# CONFIG_CRC16 is not set -CONFIG_CRC32=y -CONFIG_LIBCRC32C=m -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=m -CONFIG_TEXTSEARCH=y -CONFIG_TEXTSEARCH_KMP=m -CONFIG_TEXTSEARCH_BM=m -CONFIG_TEXTSEARCH_FSM=m diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 9a74b7ab03a4..4970e3721a84 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -25,7 +25,7 @@ obj-$(CONFIG_PPC_OF) += of_device.o procfs-$(CONFIG_PPC64) := proc_ppc64.o obj-$(CONFIG_PROC_FS) += $(procfs-y) rtaspci-$(CONFIG_PPC64) := rtas_pci.o -obj-$(CONFIG_PPC_RTAS) += rtas.o $(rtaspci-y) +obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y) obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o obj-$(CONFIG_RTAS_PROC) += rtas-proc.o obj-$(CONFIG_LPARCFG) += lparcfg.o @@ -49,12 +49,23 @@ extra-y += vmlinux.lds obj-y += process.o init_task.o time.o \ prom.o traps.o setup-common.o obj-$(CONFIG_PPC32) += entry_32.o setup_32.o misc_32.o systbl.o -obj-$(CONFIG_PPC64) += misc_64.o +obj-$(CONFIG_PPC64) += misc_64.o dma_64.o iommu.o obj-$(CONFIG_PPC_OF) += prom_init.o obj-$(CONFIG_MODULES) += ppc_ksyms.o obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_6xx) += idle_6xx.o obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_KPROBES) += kprobes.o + +module-$(CONFIG_PPC64) += module_64.o +obj-$(CONFIG_MODULES) += $(module-y) + +pci64-$(CONFIG_PPC64) += pci_64.o pci_dn.o pci_iommu.o \ + pci_direct_iommu.o iomap.o +obj-$(CONFIG_PCI) += $(pci64-y) + +kexec64-$(CONFIG_PPC64) += machine_kexec_64.o +obj-$(CONFIG_KEXEC) += $(kexec64-y) ifeq ($(CONFIG_PPC_ISERIES),y) $(obj)/head_64.o: $(obj)/lparmap.s @@ -62,11 +73,8 @@ AFLAGS_head_64.o += -I$(obj) endif else -# stuff used from here for ARCH=ppc or ARCH=ppc64 +# stuff used from here for ARCH=ppc smpobj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_PPC64) += traps.o process.o init_task.o time.o \ - setup-common.o $(smpobj-y) - endif diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 4550eb4f4fbd..91538d2445bf 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -270,13 +270,15 @@ int main(void) DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec)); DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec)); DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec)); + DEFINE(TSPC64_TV_SEC, offsetof(struct timespec, tv_sec)); + DEFINE(TSPC64_TV_NSEC, offsetof(struct timespec, tv_nsec)); DEFINE(TSPC32_TV_SEC, offsetof(struct compat_timespec, tv_sec)); DEFINE(TSPC32_TV_NSEC, offsetof(struct compat_timespec, tv_nsec)); #else DEFINE(TVAL32_TV_SEC, offsetof(struct timeval, tv_sec)); DEFINE(TVAL32_TV_USEC, offsetof(struct timeval, tv_usec)); - DEFINE(TSPEC32_TV_SEC, offsetof(struct timespec, tv_sec)); - DEFINE(TSPEC32_TV_NSEC, offsetof(struct timespec, tv_nsec)); + DEFINE(TSPC32_TV_SEC, offsetof(struct timespec, tv_sec)); + DEFINE(TSPC32_TV_NSEC, offsetof(struct timespec, tv_nsec)); #endif /* timeval/timezone offsets for use by vdso */ DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); diff --git a/arch/ppc64/kernel/dma.c b/arch/powerpc/kernel/dma_64.c index 7c3419656ccc..7c3419656ccc 100644 --- a/arch/ppc64/kernel/dma.c +++ b/arch/powerpc/kernel/dma_64.c diff --git a/arch/ppc64/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index 6160c8dbb7c5..6160c8dbb7c5 100644 --- a/arch/ppc64/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c diff --git a/arch/ppc64/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 4d9b4388918b..4d9b4388918b 100644 --- a/arch/ppc64/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 4b7940693f3d..5a71ed9612fe 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -71,6 +71,11 @@ #include <asm/paca.h> #endif +int __irq_offset_value; +#ifdef CONFIG_PPC32 +EXPORT_SYMBOL(__irq_offset_value); +#endif + static int ppc_spurious_interrupts; #if defined(CONFIG_PPC_ISERIES) && defined(CONFIG_SMP) @@ -98,7 +103,6 @@ extern atomic_t ipi_sent; EXPORT_SYMBOL(irq_desc); int distribute_irqs = 1; -int __irq_offset_value; u64 ppc64_interrupt_controller; #endif /* CONFIG_PPC64 */ @@ -311,7 +315,6 @@ void __init init_IRQ(void) } #ifdef CONFIG_PPC64 -#ifndef CONFIG_PPC_ISERIES /* * Virtual IRQ mapping code, used on systems with XICS interrupt controllers. */ @@ -420,8 +423,6 @@ unsigned int real_irq_to_virt_slowpath(unsigned int real_irq) } -#endif /* CONFIG_PPC_ISERIES */ - #ifdef CONFIG_IRQSTACKS struct thread_info *softirq_ctx[NR_CPUS]; struct thread_info *hardirq_ctx[NR_CPUS]; diff --git a/arch/ppc64/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 511af54e6230..511af54e6230 100644 --- a/arch/ppc64/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 1b3ba8a440a6..9dda16ccde78 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -42,32 +42,6 @@ /* #define LPARCFG_DEBUG */ -/* find a better place for this function... */ -static void log_plpar_hcall_return(unsigned long rc, char *tag) -{ - if (rc == 0) /* success, return */ - return; -/* check for null tag ? */ - if (rc == H_Hardware) - printk(KERN_INFO - "plpar-hcall (%s) failed with hardware fault\n", tag); - else if (rc == H_Function) - printk(KERN_INFO - "plpar-hcall (%s) failed; function not allowed\n", tag); - else if (rc == H_Authority) - printk(KERN_INFO - "plpar-hcall (%s) failed; not authorized to this function\n", - tag); - else if (rc == H_Parameter) - printk(KERN_INFO "plpar-hcall (%s) failed; Bad parameter(s)\n", - tag); - else - printk(KERN_INFO - "plpar-hcall (%s) failed with unexpected rc(0x%lx)\n", - tag, rc); - -} - static struct proc_dir_entry *proc_ppc64_lparcfg; #define LPARCFG_BUFF_SIZE 4096 @@ -172,6 +146,31 @@ static int lparcfg_data(struct seq_file *m, void *v) /* * Methods used to fetch LPAR data when running on a pSeries platform. */ +/* find a better place for this function... */ +static void log_plpar_hcall_return(unsigned long rc, char *tag) +{ + if (rc == 0) /* success, return */ + return; +/* check for null tag ? */ + if (rc == H_Hardware) + printk(KERN_INFO + "plpar-hcall (%s) failed with hardware fault\n", tag); + else if (rc == H_Function) + printk(KERN_INFO + "plpar-hcall (%s) failed; function not allowed\n", tag); + else if (rc == H_Authority) + printk(KERN_INFO + "plpar-hcall (%s) failed; not authorized to this function\n", + tag); + else if (rc == H_Parameter) + printk(KERN_INFO "plpar-hcall (%s) failed; Bad parameter(s)\n", + tag); + else + printk(KERN_INFO + "plpar-hcall (%s) failed with unexpected rc(0x%lx)\n", + tag, rc); + +} /* * H_GET_PPP hcall returns info in 4 parms. diff --git a/arch/ppc64/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec_64.c index 07ea03598c00..97c51e452be7 100644 --- a/arch/ppc64/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -185,8 +185,8 @@ void kexec_copy_flush(struct kimage *image) */ void kexec_smp_down(void *arg) { - if (ppc_md.cpu_irq_down) - ppc_md.cpu_irq_down(1); + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(0, 1); local_irq_disable(); kexec_smp_wait(); @@ -233,8 +233,8 @@ static void kexec_prepare_cpus(void) } /* after we tell the others to go down */ - if (ppc_md.cpu_irq_down) - ppc_md.cpu_irq_down(0); + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(0, 0); put_cpu(); @@ -255,8 +255,8 @@ static void kexec_prepare_cpus(void) * UP to an SMP kernel. */ smp_release_cpus(); - if (ppc_md.cpu_irq_down) - ppc_md.cpu_irq_down(0); + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(0, 0); local_irq_disable(); } @@ -305,3 +305,54 @@ void machine_kexec(struct kimage *image) ppc_md.hpte_clear_all); /* NOTREACHED */ } + +/* Values we need to export to the second kernel via the device tree. */ +static unsigned long htab_base, htab_size, kernel_end; + +static struct property htab_base_prop = { + .name = "linux,htab-base", + .length = sizeof(unsigned long), + .value = (unsigned char *)&htab_base, +}; + +static struct property htab_size_prop = { + .name = "linux,htab-size", + .length = sizeof(unsigned long), + .value = (unsigned char *)&htab_size, +}; + +static struct property kernel_end_prop = { + .name = "linux,kernel-end", + .length = sizeof(unsigned long), + .value = (unsigned char *)&kernel_end, +}; + +static void __init export_htab_values(void) +{ + struct device_node *node; + + node = of_find_node_by_path("/chosen"); + if (!node) + return; + + kernel_end = __pa(_end); + prom_add_property(node, &kernel_end_prop); + + /* On machines with no htab htab_address is NULL */ + if (NULL == htab_address) + goto out; + + htab_base = __pa(htab_address); + prom_add_property(node, &htab_base_prop); + + htab_size = 1UL << ppc64_pft_size; + prom_add_property(node, &htab_size_prop); + + out: + of_node_put(node); +} + +void __init kexec_setup(void) +{ + export_htab_values(); +} diff --git a/arch/ppc64/kernel/module.c b/arch/powerpc/kernel/module_64.c index 928b8581fcb0..928b8581fcb0 100644 --- a/arch/ppc64/kernel/module.c +++ b/arch/powerpc/kernel/module_64.c diff --git a/arch/ppc64/kernel/pci.c b/arch/powerpc/kernel/pci_64.c index 3cef1b8f57f0..3cef1b8f57f0 100644 --- a/arch/ppc64/kernel/pci.c +++ b/arch/powerpc/kernel/pci_64.c diff --git a/arch/ppc64/kernel/pci_direct_iommu.c b/arch/powerpc/kernel/pci_direct_iommu.c index e1a32f802c0b..e1a32f802c0b 100644 --- a/arch/ppc64/kernel/pci_direct_iommu.c +++ b/arch/powerpc/kernel/pci_direct_iommu.c diff --git a/arch/ppc64/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 12c4c9e9bbc7..12c4c9e9bbc7 100644 --- a/arch/ppc64/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c diff --git a/arch/ppc64/kernel/pci_iommu.c b/arch/powerpc/kernel/pci_iommu.c index bdf15dbbf4f0..bdf15dbbf4f0 100644 --- a/arch/ppc64/kernel/pci_iommu.c +++ b/arch/powerpc/kernel/pci_iommu.c diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 5dcf4ba05ee8..59846b40d521 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -105,6 +105,13 @@ EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__strncpy_from_user); EXPORT_SYMBOL(__strnlen_user); +#ifndef __powerpc64__ +EXPORT_SYMBOL(__ide_mm_insl); +EXPORT_SYMBOL(__ide_mm_outsw); +EXPORT_SYMBOL(__ide_mm_insw); +EXPORT_SYMBOL(__ide_mm_outsl); +#endif + EXPORT_SYMBOL(_insb); EXPORT_SYMBOL(_outsb); EXPORT_SYMBOL(_insw); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 6a5b468edb4d..3bf968e74095 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -1368,6 +1368,7 @@ prom_n_addr_cells(struct device_node* np) /* No #address-cells property for the root node, default to 1 */ return 1; } +EXPORT_SYMBOL(prom_n_addr_cells); int prom_n_size_cells(struct device_node* np) @@ -1383,6 +1384,7 @@ prom_n_size_cells(struct device_node* np) /* No #size-cells property for the root node, default to 1 */ return 1; } +EXPORT_SYMBOL(prom_n_size_cells); /** * Work out the sense (active-low level / active-high edge) diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c new file mode 100644 index 000000000000..7b948662704c --- /dev/null +++ b/arch/powerpc/kernel/rtas-rtc.c @@ -0,0 +1,105 @@ +#include <linux/kernel.h> +#include <linux/time.h> +#include <linux/timer.h> +#include <linux/init.h> +#include <linux/rtc.h> +#include <linux/delay.h> +#include <asm/prom.h> +#include <asm/rtas.h> +#include <asm/time.h> + + +#define MAX_RTC_WAIT 5000 /* 5 sec */ +#define RTAS_CLOCK_BUSY (-2) +unsigned long __init rtas_get_boot_time(void) +{ + int ret[8]; + int error, wait_time; + unsigned long max_wait_tb; + + max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; + do { + error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) { + wait_time = rtas_extended_busy_delay_time(error); + /* This is boot time so we spin. */ + udelay(wait_time*1000); + error = RTAS_CLOCK_BUSY; + } + } while (error == RTAS_CLOCK_BUSY && (get_tb() < max_wait_tb)); + + if (error != 0 && printk_ratelimit()) { + printk(KERN_WARNING "error: reading the clock failed (%d)\n", + error); + return 0; + } + + return mktime(ret[0], ret[1], ret[2], ret[3], ret[4], ret[5]); +} + +/* NOTE: get_rtc_time will get an error if executed in interrupt context + * and if a delay is needed to read the clock. In this case we just + * silently return without updating rtc_tm. + */ +void rtas_get_rtc_time(struct rtc_time *rtc_tm) +{ + int ret[8]; + int error, wait_time; + unsigned long max_wait_tb; + + max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; + do { + error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) { + if (in_interrupt() && printk_ratelimit()) { + memset(&rtc_tm, 0, sizeof(struct rtc_time)); + printk(KERN_WARNING "error: reading clock" + " would delay interrupt\n"); + return; /* delay not allowed */ + } + wait_time = rtas_extended_busy_delay_time(error); + msleep(wait_time); + error = RTAS_CLOCK_BUSY; + } + } while (error == RTAS_CLOCK_BUSY && (get_tb() < max_wait_tb)); + + if (error != 0 && printk_ratelimit()) { + printk(KERN_WARNING "error: reading the clock failed (%d)\n", + error); + return; + } + + rtc_tm->tm_sec = ret[5]; + rtc_tm->tm_min = ret[4]; + rtc_tm->tm_hour = ret[3]; + rtc_tm->tm_mday = ret[2]; + rtc_tm->tm_mon = ret[1] - 1; + rtc_tm->tm_year = ret[0] - 1900; +} + +int rtas_set_rtc_time(struct rtc_time *tm) +{ + int error, wait_time; + unsigned long max_wait_tb; + + max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; + do { + error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL, + tm->tm_year + 1900, tm->tm_mon + 1, + tm->tm_mday, tm->tm_hour, tm->tm_min, + tm->tm_sec, 0); + if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) { + if (in_interrupt()) + return 1; /* probably decrementer */ + wait_time = rtas_extended_busy_delay_time(error); + msleep(wait_time); + error = RTAS_CLOCK_BUSY; + } + } while (error == RTAS_CLOCK_BUSY && (get_tb() < max_wait_tb)); + + if (error != 0 && printk_ratelimit()) + printk(KERN_WARNING "error: setting the clock failed (%d)\n", + error); + + return 0; +} diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index c98cfcc9cd9a..e5694335bf10 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -57,10 +57,6 @@ extern void power4_idle(void); boot_infos_t *boot_infos; struct ide_machdep_calls ppc_ide_md; -/* XXX should go elsewhere */ -int __irq_offset_value; -EXPORT_SYMBOL(__irq_offset_value); - int boot_cpuid; EXPORT_SYMBOL_GPL(boot_cpuid); int boot_cpuid_phys; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index fdbd9f9122f2..608fee7c7e20 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -59,6 +59,7 @@ #include <asm/firmware.h> #include <asm/xmon.h> #include <asm/udbg.h> +#include <asm/kexec.h> #include "setup.h" @@ -415,6 +416,10 @@ void __init setup_system(void) */ unflatten_device_tree(); +#ifdef CONFIG_KEXEC + kexec_setup(); /* requires unflattened device tree. */ +#endif + /* * Fill the ppc64_caches & systemcfg structures with informations * retreived from the device-tree. Need to be called before diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 8bdf95b7e420..5a2eba60dd39 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -403,8 +403,6 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, ELF_NFPREG * sizeof(double))) return 1; - current->thread.fpscr.val = 0; /* turn off all fp exceptions */ - #ifdef CONFIG_ALTIVEC /* save altivec registers */ if (current->thread.used_vr) { @@ -818,6 +816,9 @@ static int handle_rt_signal(unsigned long sig, struct k_sigaction *ka, goto badframe; regs->link = (unsigned long) frame->tramp; } + + current->thread.fpscr.val = 0; /* turn off all fp exceptions */ + if (put_user(regs->gpr[1], (u32 __user *)newsp)) goto badframe; regs->gpr[1] = newsp; @@ -1097,6 +1098,8 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka, regs->link = (unsigned long) frame->mctx.tramp; } + current->thread.fpscr.val = 0; /* turn off all fp exceptions */ + if (put_user(regs->gpr[1], (u32 __user *)newsp)) goto badframe; regs->gpr[1] = newsp; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 58194e150711..1decf2785530 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -131,9 +131,6 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, flush_fp_to_thread(current); - /* Make sure signal doesn't get spurrious FP exceptions */ - current->thread.fpscr.val = 0; - #ifdef CONFIG_ALTIVEC err |= __put_user(v_regs, &sc->v_regs); @@ -423,6 +420,9 @@ static int setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info, if (err) goto badframe; + /* Make sure signal handler doesn't get spurious FP exceptions */ + current->thread.fpscr.val = 0; + /* Set up to return from userspace. */ if (vdso64_rt_sigtramp && current->thread.vdso_base) { regs->link = current->thread.vdso_base + vdso64_rt_sigtramp; diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S index a08c26e87835..f6b38472318d 100644 --- a/arch/powerpc/kernel/vdso32/datapage.S +++ b/arch/powerpc/kernel/vdso32/datapage.S @@ -77,8 +77,9 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) mflr r12 .cfi_register lr,r12 bl __get_datapage@local - lwz r3,CFG_TB_TICKS_PER_SEC(r3) lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) + lwz r3,CFG_TB_TICKS_PER_SEC(r3) mtlr r12 + blr .cfi_endproc V_FUNCTION_END(__kernel_get_tbfreq) diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index aeb5fc9b87b3..0a32a41d50b0 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -83,7 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) /* Check for supported clock IDs */ cmpli cr0,r3,CLOCK_REALTIME cmpli cr1,r3,CLOCK_MONOTONIC - cror cr0,cr0,cr1 + cror cr0*4+eq,cr0*4+eq,cr1*4+eq bne cr0,99f mflr r12 /* r12 saves lr */ @@ -91,7 +91,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) mr r10,r3 /* r10 saves id */ mr r11,r4 /* r11 saves tp */ bl __get_datapage@local /* get data page */ - mr r9, r3 /* datapage ptr in r9 */ + mr r9,r3 /* datapage ptr in r9 */ beq cr1,50f /* if monotonic -> jump there */ /* @@ -173,10 +173,14 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) add r4,r4,r7 lis r5,NSEC_PER_SEC@h ori r5,r5,NSEC_PER_SEC@l - cmpli cr0,r4,r5 + cmpl cr0,r4,r5 + cmpli cr1,r4,0 blt 1f subf r4,r5,r4 addi r3,r3,1 +1: bge cr1,1f + addi r3,r3,-1 + add r4,r4,r5 1: stw r3,TSPC32_TV_SEC(r11) stw r4,TSPC32_TV_NSEC(r11) @@ -210,7 +214,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) /* Check for supported clock IDs */ cmpwi cr0,r3,CLOCK_REALTIME cmpwi cr1,r3,CLOCK_MONOTONIC - cror cr0,cr0,cr1 + cror cr0*4+eq,cr0*4+eq,cr1*4+eq bne cr0,99f li r3,0 diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S index e67eda0f8cda..6393e4137bc7 100644 --- a/arch/powerpc/kernel/vdso64/datapage.S +++ b/arch/powerpc/kernel/vdso64/datapage.S @@ -80,5 +80,6 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) bl V_LOCAL_FUNC(__get_datapage) ld r3,CFG_TB_TICKS_PER_SEC(r3) mtlr r12 + blr .cfi_endproc V_FUNCTION_END(__kernel_get_tbfreq) diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index d371c02a8c0e..1a89094715cc 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -1,4 +1,5 @@ -/* + + /* * Userland implementation of gettimeofday() for 64 bits processes in a * ppc64 kernel for use in the vDSO * @@ -68,7 +69,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) /* Check for supported clock IDs */ cmpwi cr0,r3,CLOCK_REALTIME cmpwi cr1,r3,CLOCK_MONOTONIC - cror cr0,cr0,cr1 + cror cr0*4+eq,cr0*4+eq,cr1*4+eq bne cr0,99f mflr r12 /* r12 saves lr */ @@ -84,16 +85,17 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */ - lis r7,0x3b9a /* r7 = 1000000000 = NSEC_PER_SEC */ - ori r7,r7,0xca00 + lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */ + ori r7,r7,16960 rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */ rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */ std r5,TSPC64_TV_SEC(r11) /* store sec in tv */ subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */ - mulld r0,r0,r7 /* nsec = (xsec * NSEC_PER_SEC) / + mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) / * XSEC_PER_SEC */ rldicl r0,r0,44,20 + mulli r0,r0,1000 /* nsec = usec * 1000 */ std r0,TSPC64_TV_NSEC(r11) /* store nsec in tp */ mtlr r12 @@ -106,15 +108,16 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) 50: bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */ - lis r7,0x3b9a /* r7 = 1000000000 = NSEC_PER_SEC */ - ori r7,r7,0xca00 + lis r7,15 /* r7 = 1000000 = USEC_PER_SEC */ + ori r7,r7,16960 rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */ rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */ subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */ - mulld r0,r0,r7 /* nsec = (xsec * NSEC_PER_SEC) / + mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) / * XSEC_PER_SEC */ rldicl r6,r0,44,20 + mulli r6,r6,1000 /* nsec = usec * 1000 */ /* now we must fixup using wall to monotonic. We need to snapshot * that value and do the counter trick again. Fortunately, we still @@ -123,8 +126,8 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) * can be used */ - lwz r4,WTOM_CLOCK_SEC(r9) - lwz r7,WTOM_CLOCK_NSEC(r9) + lwa r4,WTOM_CLOCK_SEC(r3) + lwa r7,WTOM_CLOCK_NSEC(r3) /* We now have our result in r4,r7. We create a fake dependency * on that result and re-check the counter @@ -144,10 +147,14 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) add r7,r7,r6 lis r9,NSEC_PER_SEC@h ori r9,r9,NSEC_PER_SEC@l - cmpli cr0,r7,r9 + cmpl cr0,r7,r9 + cmpli cr1,r7,0 blt 1f subf r7,r9,r7 addi r4,r4,1 +1: bge cr1,1f + addi r4,r4,-1 + add r7,r7,r9 1: std r4,TSPC64_TV_SEC(r11) std r7,TSPC64_TV_NSEC(r11) @@ -181,7 +188,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) /* Check for supported clock IDs */ cmpwi cr0,r3,CLOCK_REALTIME cmpwi cr1,r3,CLOCK_MONOTONIC - cror cr0,cr0,cr1 + cror cr0*4+eq,cr0*4+eq,cr1*4+eq bne cr0,99f li r3,0 diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c index 01090e9ce0cf..a58daa153686 100644 --- a/arch/powerpc/platforms/iseries/irq.c +++ b/arch/powerpc/platforms/iseries/irq.c @@ -42,13 +42,6 @@ #include "irq.h" #include "call_pci.h" -/* This maps virtual irq numbers to real irqs */ -unsigned int virt_irq_to_real_map[NR_IRQS]; - -/* The next available virtual irq number */ -/* Note: the pcnet32 driver assumes irq numbers < 2 aren't valid. :( */ -static int next_virtual_irq = 2; - static long Pci_Interrupt_Count; static long Pci_Event_Count; @@ -350,26 +343,14 @@ static hw_irq_controller iSeries_IRQ_handler = { int __init iSeries_allocate_IRQ(HvBusNumber busNumber, HvSubBusNumber subBusNumber, HvAgentId deviceId) { - unsigned int realirq, virtirq; + int virtirq; + unsigned int realirq; u8 idsel = (deviceId >> 4); u8 function = deviceId & 7; - virtirq = next_virtual_irq++; realirq = ((busNumber - 1) << 6) + ((idsel - 1) << 3) + function; - virt_irq_to_real_map[virtirq] = realirq; + virtirq = virt_irq_create_mapping(realirq); irq_desc[virtirq].handler = &iSeries_IRQ_handler; return virtirq; } - -int virt_irq_create_mapping(unsigned int real_irq) -{ - BUG(); /* Don't call this on iSeries, yet */ - - return 0; -} - -void virt_irq_init(void) -{ - return; -} diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index 6a29f301436b..da26639190db 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c @@ -39,7 +39,6 @@ #include <asm/sections.h> #include <asm/iommu.h> #include <asm/firmware.h> -#include <asm/systemcfg.h> #include <asm/system.h> #include <asm/time.h> #include <asm/paca.h> @@ -548,8 +547,6 @@ static unsigned long __init build_iSeries_Memory_Map(void) */ static void __init iSeries_setup_arch(void) { - unsigned procIx = get_paca()->lppaca.dyn_hv_phys_proc_index; - if (get_paca()->lppaca.shared_proc) { ppc_md.idle_loop = iseries_shared_idle; printk(KERN_INFO "Using shared processor idle loop\n"); @@ -565,9 +562,6 @@ static void __init iSeries_setup_arch(void) itVpdAreas.xSlicMaxLogicalProcs); printk("Max physical processors = %d\n", itVpdAreas.xSlicMaxPhysicalProcs); - - _systemcfg->processor = xIoHriProcessorVpd[procIx].xPVR; - printk("Processor version = %x\n", _systemcfg->processor); } static void iSeries_show_cpuinfo(struct seq_file *m) diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c index 5947b21a8588..feb0a94e7819 100644 --- a/arch/powerpc/platforms/powermac/time.c +++ b/arch/powerpc/platforms/powermac/time.c @@ -102,7 +102,7 @@ static unsigned long from_rtc_time(struct rtc_time *tm) static unsigned long cuda_get_time(void) { struct adb_request req; - unsigned long now; + unsigned int now; if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0) return 0; @@ -113,7 +113,7 @@ static unsigned long cuda_get_time(void) req.reply_len); now = (req.reply[3] << 24) + (req.reply[4] << 16) + (req.reply[5] << 8) + req.reply[6]; - return now - RTC_OFFSET; + return ((unsigned long)now) - RTC_OFFSET; } #define cuda_get_rtc_time(tm) to_rtc_time(cuda_get_time(), (tm)) @@ -146,7 +146,7 @@ static int cuda_set_rtc_time(struct rtc_time *tm) static unsigned long pmu_get_time(void) { struct adb_request req; - unsigned long now; + unsigned int now; if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0) return 0; @@ -156,7 +156,7 @@ static unsigned long pmu_get_time(void) req.reply_len); now = (req.reply[0] << 24) + (req.reply[1] << 16) + (req.reply[2] << 8) + req.reply[3]; - return now - RTC_OFFSET; + return ((unsigned long)now) - RTC_OFFSET; } #define pmu_get_rtc_time(tm) to_rtc_time(pmu_get_time(), (tm)) @@ -199,6 +199,7 @@ static unsigned long smu_get_time(void) #define smu_set_rtc_time(tm, spin) 0 #endif +/* Can't be __init, it's called when suspending and resuming */ unsigned long pmac_get_boot_time(void) { /* Get the time from the RTC, used only at boot time */ diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index e7ca5b1f591e..06d5ef501218 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -4,4 +4,7 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_IBMVIO) += vio.o obj-$(CONFIG_XICS) += xics.o obj-$(CONFIG_SCANLOG) += scanlog.o -obj-$(CONFIG_EEH) += eeh.o eeh_event.o +obj-$(CONFIG_EEH) += eeh.o eeh_event.o + +obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o +obj-$(CONFIG_HVCS) += hvcserver.o diff --git a/arch/ppc64/kernel/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c index 138e128a3886..138e128a3886 100644 --- a/arch/ppc64/kernel/hvconsole.c +++ b/arch/powerpc/platforms/pseries/hvconsole.c diff --git a/arch/ppc64/kernel/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c index 4d584172055a..4d584172055a 100644 --- a/arch/ppc64/kernel/hvcserver.c +++ b/arch/powerpc/platforms/pseries/hvcserver.c diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 31990829310c..b9d9732b2e06 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -200,14 +200,12 @@ static void __init pSeries_setup_arch(void) if (ppc64_interrupt_controller == IC_OPEN_PIC) { ppc_md.init_IRQ = pSeries_init_mpic; ppc_md.get_irq = mpic_get_irq; - ppc_md.cpu_irq_down = mpic_teardown_this_cpu; /* Allocate the mpic now, so that find_and_init_phbs() can * fill the ISUs */ pSeries_setup_mpic(); } else { ppc_md.init_IRQ = xics_init_IRQ; ppc_md.get_irq = xics_get_irq; - ppc_md.cpu_irq_down = xics_teardown_cpu; } #ifdef CONFIG_SMP @@ -595,6 +593,27 @@ static int pSeries_pci_probe_mode(struct pci_bus *bus) return PCI_PROBE_NORMAL; } +#ifdef CONFIG_KEXEC +static void pseries_kexec_cpu_down(int crash_shutdown, int secondary) +{ + /* Don't risk a hypervisor call if we're crashing */ + if (!crash_shutdown) { + unsigned long vpa = __pa(&get_paca()->lppaca); + + if (unregister_vpa(hard_smp_processor_id(), vpa)) { + printk("VPA deregistration of cpu %u (hw_cpu_id %d) " + "failed\n", smp_processor_id(), + hard_smp_processor_id()); + } + } + + if (ppc64_interrupt_controller == IC_OPEN_PIC) + mpic_teardown_this_cpu(secondary); + else + xics_teardown_cpu(secondary); +} +#endif + struct machdep_calls __initdata pSeries_md = { .probe = pSeries_probe, .setup_arch = pSeries_setup_arch, @@ -617,4 +636,7 @@ struct machdep_calls __initdata pSeries_md = { .check_legacy_ioport = pSeries_check_legacy_ioport, .system_reset_exception = pSeries_system_reset_exception, .machine_check_exception = pSeries_machine_check_exception, +#ifdef CONFIG_KEXEC + .kexec_cpu_down = pseries_kexec_cpu_down, +#endif }; diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig deleted file mode 100644 index 9d10c12e87fe..000000000000 --- a/arch/ppc64/Kconfig +++ /dev/null @@ -1,520 +0,0 @@ -# -# For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. -# - -config 64BIT - def_bool y - -config MMU - bool - default y - -config PPC_STD_MMU - def_bool y - -config UID16 - bool - -config RWSEM_GENERIC_SPINLOCK - bool - -config RWSEM_XCHGADD_ALGORITHM - bool - default y - -config GENERIC_CALIBRATE_DELAY - bool - default y - -config GENERIC_ISA_DMA - bool - default y - -config EARLY_PRINTK - bool - default y - -config COMPAT - bool - default y - -config SCHED_NO_NO_OMIT_FRAME_POINTER - bool - default y - -config ARCH_MAY_HAVE_PC_FDC - bool - default y - -config PPC_STD_MMU - bool - default y - -# We optimistically allocate largepages from the VM, so make the limit -# large enough (16MB). This badly named config option is actually -# max order + 1 -config FORCE_MAX_ZONEORDER - int - default "9" if PPC_64K_PAGES - default "13" - -source "init/Kconfig" - -config SYSVIPC_COMPAT - bool - depends on COMPAT && SYSVIPC - default y - -menu "Platform support" - -choice - prompt "Platform Type" - default PPC_MULTIPLATFORM - -config PPC_ISERIES - bool "IBM Legacy iSeries" - -config PPC_MULTIPLATFORM - bool "Generic" - -endchoice - -config PPC_PSERIES - depends on PPC_MULTIPLATFORM - bool " IBM pSeries & new iSeries" - default y - -config PPC_BPA - bool " Broadband Processor Architecture" - depends on PPC_MULTIPLATFORM - -config PPC_PMAC - depends on PPC_MULTIPLATFORM - bool " Apple G5 based machines" - default y - select U3_DART - select GENERIC_TBSYNC - -config PPC_MAPLE - depends on PPC_MULTIPLATFORM - bool " Maple 970FX Evaluation Board" - select U3_DART - select MPIC_BROKEN_U3 - select GENERIC_TBSYNC - default n - help - This option enables support for the Maple 970FX Evaluation Board. - For more informations, refer to <http://www.970eval.com> - -config PPC - bool - default y - -config PPC64 - bool - default y - -config PPC_OF - depends on PPC_MULTIPLATFORM - bool - default y - -config XICS - depends on PPC_PSERIES - bool - default y - -config MPIC - depends on PPC_PSERIES || PPC_PMAC || PPC_MAPLE - bool - default y - -config PPC_I8259 - depends on PPC_PSERIES - bool - default y - -config BPA_IIC - depends on PPC_BPA - bool - default y - -# VMX is pSeries only for now until somebody writes the iSeries -# exception vectors for it -config ALTIVEC - bool "Support for VMX (Altivec) vector unit" - depends on PPC_MULTIPLATFORM - default y - -config PPC_SPLPAR - depends on PPC_PSERIES - bool "Support for shared-processor logical partitions" - default n - help - Enabling this option will make the kernel run more efficiently - on logically-partitioned pSeries systems which use shared - processors, that is, which share physical processors between - two or more partitions. - -config KEXEC - bool "kexec system call (EXPERIMENTAL)" - depends on PPC_MULTIPLATFORM && EXPERIMENTAL - help - kexec is a system call that implements the ability to shutdown your - current kernel, and to start another kernel. It is like a reboot - but it is indepedent of the system firmware. And like a reboot - you can start any kernel with it, not just Linux. - - The name comes from the similiarity to the exec system call. - - It is an ongoing process to be certain the hardware in a machine - is properly shutdown, so do not be surprised if this code does not - initially work for you. It may help to enable device hotplugging - support. As of this writing the exact hardware interface is - strongly in flux, so no good recommendation can be made. - -source "drivers/cpufreq/Kconfig" - -config CPU_FREQ_PMAC64 - bool "Support for some Apple G5s" - depends on CPU_FREQ && PMAC_SMU && PPC64 - select CPU_FREQ_TABLE - help - This adds support for frequency switching on Apple iMac G5, - and some of the more recent desktop G5 machines as well. - -config IBMVIO - depends on PPC_PSERIES || PPC_ISERIES - bool - default y - -config U3_DART - bool - depends on PPC_MULTIPLATFORM - default n - -config MPIC_BROKEN_U3 - bool - depends on PPC_MAPLE - default y - -config GENERIC_TBSYNC - def_bool n - -config PPC_PMAC64 - bool - depends on PPC_PMAC - default y - -config BOOTX_TEXT - bool "Support for early boot text console" - depends PPC_OF - help - Say Y here to see progress messages from the boot firmware in text - mode. Requires an Open Firmware compatible video card. - -config POWER4 - def_bool y - -config PPC_FPU - def_bool y - -config POWER4_ONLY - bool "Optimize for POWER4" - default n - ---help--- - Cause the compiler to optimize for POWER4 processors. The resulting - binary will not work on POWER3 or RS64 processors when compiled with - binutils 2.15 or later. - -config IOMMU_VMERGE - bool "Enable IOMMU virtual merging (EXPERIMENTAL)" - depends on EXPERIMENTAL - default n - help - Cause IO segments sent to a device for DMA to be merged virtually - by the IOMMU when they happen to have been allocated contiguously. - This doesn't add pressure to the IOMMU allocator. However, some - drivers don't support getting large merged segments coming back - from *_map_sg(). Say Y if you know the drivers you are using are - properly handling this case. - -config SMP - bool "Symmetric multi-processing support" - ---help--- - This enables support for systems with more than one CPU. If you have - a system with only one CPU, say N. If you have a system with more - than one CPU, say Y. - - If you say N here, the kernel will run on single and multiprocessor - machines, but will use only one CPU of a multiprocessor machine. If - you say Y here, the kernel will run on single-processor machines. - On a single-processor machine, the kernel will run faster if you say - N here. - - If you don't know what to do here, say Y. - -config NR_CPUS - int "Maximum number of CPUs (2-128)" - range 2 128 - depends on SMP - default "32" - -config HMT - bool "Hardware multithreading" - depends on SMP && PPC_PSERIES && BROKEN - help - This option enables hardware multithreading on RS64 cpus. - pSeries systems p620 and p660 have such a cpu type. - -config NUMA - bool "NUMA support" - default y if SMP && PPC_PSERIES - -config ARCH_SELECT_MEMORY_MODEL - def_bool y - -config ARCH_FLATMEM_ENABLE - def_bool y - depends on !NUMA - -config ARCH_SPARSEMEM_ENABLE - def_bool y - -config ARCH_SPARSEMEM_DEFAULT - def_bool y - depends on NUMA - -source "mm/Kconfig" - -config HAVE_ARCH_EARLY_PFN_TO_NID - def_bool y - depends on NEED_MULTIPLE_NODES - -config ARCH_MEMORY_PROBE - def_bool y - depends on MEMORY_HOTPLUG - -# Some NUMA nodes have memory ranges that span -# other nodes. Even though a pfn is valid and -# between a node's start and end pfns, it may not -# reside on that node. -# -# This is a relatively temporary hack that should -# be able to go away when sparsemem is fully in -# place -config NODES_SPAN_OTHER_NODES - def_bool y - depends on NEED_MULTIPLE_NODES - -config PPC_64K_PAGES - bool "64k page size" - help - This option changes the kernel logical page size to 64k. On machines - without processor support for 64k pages, the kernel will simulate - them by loading each individual 4k page on demand transparently, - while on hardware with such support, it will be used to map - normal application pages. - -config SCHED_SMT - bool "SMT (Hyperthreading) scheduler support" - depends on SMP - default off - help - SMT scheduler support improves the CPU scheduler's decision making - when dealing with POWER5 cpus at a cost of slightly increased - overhead in some places. If unsure say N here. - -source "kernel/Kconfig.preempt" -source kernel/Kconfig.hz - -config EEH - bool "PCI Extended Error Handling (EEH)" if EMBEDDED - depends on PPC_PSERIES - default y if !EMBEDDED - -# -# Use the generic interrupt handling code in kernel/irq/: -# -config GENERIC_HARDIRQS - bool - default y - -config PPC_RTAS - bool - depends on PPC_PSERIES || PPC_BPA - default y - -config RTAS_ERROR_LOGGING - bool - depends on PPC_RTAS - default y - -config RTAS_PROC - bool "Proc interface to RTAS" - depends on PPC_RTAS - default y - -config RTAS_FLASH - tristate "Firmware flash interface" - depends on RTAS_PROC - -config SCANLOG - tristate "Scanlog dump interface" - depends on RTAS_PROC && PPC_PSERIES - -config LPARCFG - tristate "LPAR Configuration Data" - depends on PPC_PSERIES || PPC_ISERIES - help - Provide system capacity information via human readable - <key word>=<value> pairs through a /proc/ppc64/lparcfg interface. - -config SECCOMP - bool "Enable seccomp to safely compute untrusted bytecode" - depends on PROC_FS - default y - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via /proc/<pid>/seccomp, it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - - If unsure, say Y. Only embedded should say N here. - -source "fs/Kconfig.binfmt" - -config HOTPLUG_CPU - bool "Support for hot-pluggable CPUs" - depends on SMP && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC) - select HOTPLUG - ---help--- - Say Y here to be able to turn CPUs off and on. - - Say N if you are unsure. - -config PROC_DEVICETREE - bool "Support for Open Firmware device tree in /proc" - help - This option adds a device-tree directory under /proc which contains - an image of the device tree that the kernel copies from Open - Firmware. If unsure, say Y here. - -config CMDLINE_BOOL - bool "Default bootloader kernel arguments" - depends on !PPC_ISERIES - -config CMDLINE - string "Initial kernel command string" - depends on CMDLINE_BOOL - default "console=ttyS0,9600 console=tty0 root=/dev/sda2" - help - On some platforms, there is currently no way for the boot loader to - pass arguments to the kernel. For these platforms, you can supply - some command-line options at build time by entering them here. In - most cases you will need to specify the root device here. - -endmenu - -config ISA_DMA_API - bool - default y - -menu "Bus Options" - -config ISA - bool - help - Find out whether you have ISA slots on your motherboard. ISA is the - name of a bus system, i.e. the way the CPU talks to the other stuff - inside your box. If you have an Apple machine, say N here; if you - have an IBM RS/6000 or pSeries machine or a PReP machine, say Y. If - you have an embedded board, consult your board documentation. - -config SBUS - bool - -config MCA - bool - -config EISA - bool - -config PCI - bool "support for PCI devices" if (EMBEDDED && PPC_ISERIES) - default y - help - Find out whether your system includes a PCI bus. PCI is the name of - a bus system, i.e. the way the CPU talks to the other stuff inside - your box. If you say Y here, the kernel will include drivers and - infrastructure code to support PCI bus devices. - -config PCI_DOMAINS - bool - default PCI - -source "drivers/pci/Kconfig" - -source "drivers/pcmcia/Kconfig" - -source "drivers/pci/hotplug/Kconfig" - -endmenu - -source "net/Kconfig" - -source "drivers/Kconfig" - -source "fs/Kconfig" - -menu "iSeries device drivers" - depends on PPC_ISERIES - -config VIOCONS - tristate "iSeries Virtual Console Support" - -config VIODASD - tristate "iSeries Virtual I/O disk support" - help - If you are running on an iSeries system and you want to use - virtual disks created and managed by OS/400, say Y. - -config VIOCD - tristate "iSeries Virtual I/O CD support" - help - If you are running Linux on an IBM iSeries system and you want to - read a CD drive owned by OS/400, say Y here. - -config VIOTAPE - tristate "iSeries Virtual Tape Support" - help - If you are running Linux on an iSeries system and you want Linux - to read and/or write a tape drive owned by OS/400, say Y here. - -endmenu - -config VIOPATH - bool - depends on VIOCONS || VIODASD || VIOCD || VIOTAPE || VETH - default y - -source "arch/powerpc/oprofile/Kconfig" - -source "arch/ppc64/Kconfig.debug" - -source "security/Kconfig" - -config KEYS_COMPAT - bool - depends on COMPAT && KEYS - default y - -source "crypto/Kconfig" - -source "lib/Kconfig" diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile index dac4cc20fa93..e876c213f5ce 100644 --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -2,45 +2,6 @@ # Makefile for the linux ppc64 kernel. # -ifneq ($(CONFIG_PPC_MERGE),y) - -EXTRA_CFLAGS += -mno-minimal-toc -extra-y := head.o vmlinux.lds - -obj-y := misc.o prom.o - -endif - -obj-y += idle.o dma.o \ - align.o \ - rtc.o \ - iommu.o - -pci-obj-$(CONFIG_PPC_MULTIPLATFORM) += pci_dn.o pci_direct_iommu.o - -obj-$(CONFIG_PCI) += pci.o pci_iommu.o iomap.o $(pci-obj-y) +obj-y += idle.o align.o obj-$(CONFIG_PPC_MULTIPLATFORM) += nvram.o -ifneq ($(CONFIG_PPC_MERGE),y) -obj-$(CONFIG_PPC_MULTIPLATFORM) += prom_init.o -endif - -obj-$(CONFIG_KEXEC) += machine_kexec.o -obj-$(CONFIG_MODULES) += module.o -ifneq ($(CONFIG_PPC_MERGE),y) -obj-$(CONFIG_MODULES) += ppc_ksyms.o -endif -obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o -ifneq ($(CONFIG_PPC_MERGE),y) -obj-$(CONFIG_BOOTX_TEXT) += btext.o -endif -obj-$(CONFIG_HVCS) += hvcserver.o - -obj-$(CONFIG_KPROBES) += kprobes.o - -ifneq ($(CONFIG_PPC_MERGE),y) -ifeq ($(CONFIG_PPC_ISERIES),y) -arch/ppc64/kernel/head.o: arch/powerpc/kernel/lparmap.s -AFLAGS_head.o += -Iarch/powerpc/kernel -endif -endif diff --git a/arch/ppc64/kernel/asm-offsets.c b/arch/ppc64/kernel/asm-offsets.c deleted file mode 100644 index 84ab5c18ef52..000000000000 --- a/arch/ppc64/kernel/asm-offsets.c +++ /dev/null @@ -1,195 +0,0 @@ -/* - * This program is used to generate definitions needed by - * assembly language modules. - * - * We use the technique used in the OSF Mach kernel code: - * generate asm statements containing #defines, - * compile this file to assembler, and then extract the - * #defines from the assembly-language output. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/config.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/mman.h> -#include <linux/mm.h> -#include <linux/time.h> -#include <linux/hardirq.h> -#include <asm/io.h> -#include <asm/page.h> -#include <asm/pgtable.h> -#include <asm/processor.h> - -#include <asm/paca.h> -#include <asm/lppaca.h> -#include <asm/iseries/hv_lp_event.h> -#include <asm/rtas.h> -#include <asm/cputable.h> -#include <asm/cache.h> -#include <asm/systemcfg.h> -#include <asm/compat.h> - -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) - -int main(void) -{ - /* thread struct on stack */ - DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); - DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); - DEFINE(TI_SC_NOERR, offsetof(struct thread_info, syscall_noerror)); - - /* task_struct->thread */ - DEFINE(THREAD, offsetof(struct task_struct, thread)); - DEFINE(PT_REGS, offsetof(struct thread_struct, regs)); - DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode)); - DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0])); - DEFINE(THREAD_FPSCR, offsetof(struct thread_struct, fpscr)); - DEFINE(KSP, offsetof(struct thread_struct, ksp)); - DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid)); - -#ifdef CONFIG_ALTIVEC - DEFINE(THREAD_VR0, offsetof(struct thread_struct, vr[0])); - DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave)); - DEFINE(THREAD_VSCR, offsetof(struct thread_struct, vscr)); - DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr)); -#endif /* CONFIG_ALTIVEC */ - DEFINE(MM, offsetof(struct task_struct, mm)); - DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context)); - - DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size)); - DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_dline_size)); - DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, dlines_per_page)); - DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size)); - DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size)); - DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); - DEFINE(PLATFORM_LPAR, PLATFORM_LPAR); - - /* paca */ - DEFINE(PACA_SIZE, sizeof(struct paca_struct)); - DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index)); - DEFINE(PACAPROCSTART, offsetof(struct paca_struct, cpu_start)); - DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack)); - DEFINE(PACACURRENT, offsetof(struct paca_struct, __current)); - DEFINE(PACASAVEDMSR, offsetof(struct paca_struct, saved_msr)); - DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real)); - DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr)); - DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr)); - DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1)); - DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc)); - DEFINE(PACAPROCENABLED, offsetof(struct paca_struct, proc_enabled)); - DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); - DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); - DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); -#ifdef CONFIG_PPC_64K_PAGES - DEFINE(PACAPGDIR, offsetof(struct paca_struct, pgdir)); -#endif -#ifdef CONFIG_HUGETLB_PAGE - DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas)); - DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas)); -#endif /* CONFIG_HUGETLB_PAGE */ - DEFINE(PACADEFAULTDECR, offsetof(struct paca_struct, default_decr)); - DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); - DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc)); - DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb)); - DEFINE(PACA_EXDSI, offsetof(struct paca_struct, exdsi)); - DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); - DEFINE(PACALPPACA, offsetof(struct paca_struct, lppaca)); - DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); - DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0)); - DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1)); - DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int)); - DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int)); - - /* RTAS */ - DEFINE(RTASBASE, offsetof(struct rtas_t, base)); - DEFINE(RTASENTRY, offsetof(struct rtas_t, entry)); - - /* Interrupt register frame */ - DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD); - - DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); - - /* 288 = # of volatile regs, int & fp, for leaf routines */ - /* which do not stack a frame. See the PPC64 ABI. */ - DEFINE(INT_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 288); - /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */ - DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); - DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); - DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0])); - DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1])); - DEFINE(GPR2, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[2])); - DEFINE(GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[3])); - DEFINE(GPR4, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[4])); - DEFINE(GPR5, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[5])); - DEFINE(GPR6, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[6])); - DEFINE(GPR7, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[7])); - DEFINE(GPR8, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[8])); - DEFINE(GPR9, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[9])); - DEFINE(GPR10, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[10])); - DEFINE(GPR11, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[11])); - DEFINE(GPR12, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[12])); - DEFINE(GPR13, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[13])); - /* - * Note: these symbols include _ because they overlap with special - * register names - */ - DEFINE(_NIP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, nip)); - DEFINE(_MSR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, msr)); - DEFINE(_CTR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ctr)); - DEFINE(_LINK, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, link)); - DEFINE(_CCR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ccr)); - DEFINE(_XER, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, xer)); - DEFINE(_DAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar)); - DEFINE(_DSISR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr)); - DEFINE(ORIG_GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, orig_gpr3)); - DEFINE(RESULT, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, result)); - DEFINE(_TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap)); - DEFINE(SOFTE, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, softe)); - - /* These _only_ to be used with {PROM,RTAS}_FRAME_SIZE!!! */ - DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)); - DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8); - - DEFINE(CLONE_VM, CLONE_VM); - DEFINE(CLONE_UNTRACED, CLONE_UNTRACED); - - /* About the CPU features table */ - DEFINE(CPU_SPEC_ENTRY_SIZE, sizeof(struct cpu_spec)); - DEFINE(CPU_SPEC_PVR_MASK, offsetof(struct cpu_spec, pvr_mask)); - DEFINE(CPU_SPEC_PVR_VALUE, offsetof(struct cpu_spec, pvr_value)); - DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features)); - DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup)); - - /* systemcfg offsets for use by vdso */ - DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct systemcfg, tb_orig_stamp)); - DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct systemcfg, tb_ticks_per_sec)); - DEFINE(CFG_TB_TO_XS, offsetof(struct systemcfg, tb_to_xs)); - DEFINE(CFG_STAMP_XSEC, offsetof(struct systemcfg, stamp_xsec)); - DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct systemcfg, tb_update_count)); - DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct systemcfg, tz_minuteswest)); - DEFINE(CFG_TZ_DSTTIME, offsetof(struct systemcfg, tz_dsttime)); - DEFINE(CFG_SYSCALL_MAP32, offsetof(struct systemcfg, syscall_map_32)); - DEFINE(CFG_SYSCALL_MAP64, offsetof(struct systemcfg, syscall_map_64)); - - /* timeval/timezone offsets for use by vdso */ - DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec)); - DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec)); - DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec)); - DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec)); - DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); - DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); - - return 0; -} diff --git a/arch/ppc64/kernel/btext.c b/arch/ppc64/kernel/btext.c deleted file mode 100644 index 506a37885c5c..000000000000 --- a/arch/ppc64/kernel/btext.c +++ /dev/null @@ -1,792 +0,0 @@ -/* - * Procedures for drawing on the screen early on in the boot process. - * - * Benjamin Herrenschmidt <benh@kernel.crashing.org> - */ -#include <linux/config.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/init.h> - -#include <asm/sections.h> -#include <asm/prom.h> -#include <asm/btext.h> -#include <asm/prom.h> -#include <asm/page.h> -#include <asm/mmu.h> -#include <asm/pgtable.h> -#include <asm/io.h> -#include <asm/lmb.h> -#include <asm/processor.h> -#include <asm/udbg.h> - -#undef NO_SCROLL - -#ifndef NO_SCROLL -static void scrollscreen(void); -#endif - -static void draw_byte(unsigned char c, long locX, long locY); -static void draw_byte_32(unsigned char *bits, unsigned int *base, int rb); -static void draw_byte_16(unsigned char *bits, unsigned int *base, int rb); -static void draw_byte_8(unsigned char *bits, unsigned int *base, int rb); - -static int g_loc_X; -static int g_loc_Y; -static int g_max_loc_X; -static int g_max_loc_Y; - -static int dispDeviceRowBytes; -static int dispDeviceDepth; -static int dispDeviceRect[4]; -static unsigned char *dispDeviceBase, *logicalDisplayBase; - -unsigned long disp_BAT[2] __initdata = {0, 0}; - -#define cmapsz (16*256) - -static unsigned char vga_font[cmapsz]; - -int boot_text_mapped; -int force_printk_to_btext = 0; - - -/* Here's a small text engine to use during early boot - * or for debugging purposes - * - * todo: - * - * - build some kind of vgacon with it to enable early printk - * - move to a separate file - * - add a few video driver hooks to keep in sync with display - * changes. - */ - -void map_boot_text(void) -{ - unsigned long base, offset, size; - unsigned char *vbase; - - /* By default, we are no longer mapped */ - boot_text_mapped = 0; - if (dispDeviceBase == 0) - return; - base = ((unsigned long) dispDeviceBase) & 0xFFFFF000UL; - offset = ((unsigned long) dispDeviceBase) - base; - size = dispDeviceRowBytes * dispDeviceRect[3] + offset - + dispDeviceRect[0]; - vbase = __ioremap(base, size, _PAGE_NO_CACHE); - if (vbase == 0) - return; - logicalDisplayBase = vbase + offset; - boot_text_mapped = 1; -} - -int btext_initialize(struct device_node *np) -{ - unsigned int width, height, depth, pitch; - unsigned long address = 0; - u32 *prop; - - prop = (u32 *)get_property(np, "width", NULL); - if (prop == NULL) - return -EINVAL; - width = *prop; - prop = (u32 *)get_property(np, "height", NULL); - if (prop == NULL) - return -EINVAL; - height = *prop; - prop = (u32 *)get_property(np, "depth", NULL); - if (prop == NULL) - return -EINVAL; - depth = *prop; - pitch = width * ((depth + 7) / 8); - prop = (u32 *)get_property(np, "linebytes", NULL); - if (prop) - pitch = *prop; - if (pitch == 1) - pitch = 0x1000; - prop = (u32 *)get_property(np, "address", NULL); - if (prop) - address = *prop; - - /* FIXME: Add support for PCI reg properties */ - - if (address == 0) - return -EINVAL; - - g_loc_X = 0; - g_loc_Y = 0; - g_max_loc_X = width / 8; - g_max_loc_Y = height / 16; - logicalDisplayBase = (unsigned char *)address; - dispDeviceBase = (unsigned char *)address; - dispDeviceRowBytes = pitch; - dispDeviceDepth = depth; - dispDeviceRect[0] = dispDeviceRect[1] = 0; - dispDeviceRect[2] = width; - dispDeviceRect[3] = height; - - map_boot_text(); - - return 0; -} - -static void btext_putc(unsigned char c) -{ - btext_drawchar(c); -} - -void __init init_boot_display(void) -{ - char *name; - struct device_node *np = NULL; - int rc = -ENODEV; - - printk("trying to initialize btext ...\n"); - - name = (char *)get_property(of_chosen, "linux,stdout-path", NULL); - if (name != NULL) { - np = of_find_node_by_path(name); - if (np != NULL) { - if (strcmp(np->type, "display") != 0) { - printk("boot stdout isn't a display !\n"); - of_node_put(np); - np = NULL; - } - } - } - if (np) - rc = btext_initialize(np); - if (rc) { - for (np = NULL; (np = of_find_node_by_type(np, "display"));) { - if (get_property(np, "linux,opened", NULL)) { - printk("trying %s ...\n", np->full_name); - rc = btext_initialize(np); - printk("result: %d\n", rc); - } - if (rc == 0) - break; - } - } - if (rc == 0 && udbg_putc == NULL) - udbg_putc = btext_putc; -} - - -/* Calc the base address of a given point (x,y) */ -static unsigned char * calc_base(int x, int y) -{ - unsigned char *base; - - base = logicalDisplayBase; - if (base == 0) - base = dispDeviceBase; - base += (x + dispDeviceRect[0]) * (dispDeviceDepth >> 3); - base += (y + dispDeviceRect[1]) * dispDeviceRowBytes; - return base; -} - -/* Adjust the display to a new resolution */ -void btext_update_display(unsigned long phys, int width, int height, - int depth, int pitch) -{ - if (dispDeviceBase == 0) - return; - - /* check it's the same frame buffer (within 256MB) */ - if ((phys ^ (unsigned long)dispDeviceBase) & 0xf0000000) - return; - - dispDeviceBase = (__u8 *) phys; - dispDeviceRect[0] = 0; - dispDeviceRect[1] = 0; - dispDeviceRect[2] = width; - dispDeviceRect[3] = height; - dispDeviceDepth = depth; - dispDeviceRowBytes = pitch; - if (boot_text_mapped) { - iounmap(logicalDisplayBase); - boot_text_mapped = 0; - } - map_boot_text(); - g_loc_X = 0; - g_loc_Y = 0; - g_max_loc_X = width / 8; - g_max_loc_Y = height / 16; -} - -void btext_clearscreen(void) -{ - unsigned long *base = (unsigned long *)calc_base(0, 0); - unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * - (dispDeviceDepth >> 3)) >> 3; - int i,j; - - for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1]); i++) - { - unsigned long *ptr = base; - for(j=width; j; --j) - *(ptr++) = 0; - base += (dispDeviceRowBytes >> 3); - } -} - -#ifndef NO_SCROLL -static void scrollscreen(void) -{ - unsigned long *src = (unsigned long *)calc_base(0,16); - unsigned long *dst = (unsigned long *)calc_base(0,0); - unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * - (dispDeviceDepth >> 3)) >> 3; - int i,j; - - for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1] - 16); i++) - { - unsigned long *src_ptr = src; - unsigned long *dst_ptr = dst; - for(j=width; j; --j) - *(dst_ptr++) = *(src_ptr++); - src += (dispDeviceRowBytes >> 3); - dst += (dispDeviceRowBytes >> 3); - } - for (i=0; i<16; i++) - { - unsigned long *dst_ptr = dst; - for(j=width; j; --j) - *(dst_ptr++) = 0; - dst += (dispDeviceRowBytes >> 3); - } -} -#endif /* ndef NO_SCROLL */ - -void btext_drawchar(char c) -{ - int cline = 0; -#ifdef NO_SCROLL - int x; -#endif - if (!boot_text_mapped) - return; - - switch (c) { - case '\b': - if (g_loc_X > 0) - --g_loc_X; - break; - case '\t': - g_loc_X = (g_loc_X & -8) + 8; - break; - case '\r': - g_loc_X = 0; - break; - case '\n': - g_loc_X = 0; - g_loc_Y++; - cline = 1; - break; - default: - draw_byte(c, g_loc_X++, g_loc_Y); - } - if (g_loc_X >= g_max_loc_X) { - g_loc_X = 0; - g_loc_Y++; - cline = 1; - } -#ifndef NO_SCROLL - while (g_loc_Y >= g_max_loc_Y) { - scrollscreen(); - g_loc_Y--; - } -#else - /* wrap around from bottom to top of screen so we don't - waste time scrolling each line. -- paulus. */ - if (g_loc_Y >= g_max_loc_Y) - g_loc_Y = 0; - if (cline) { - for (x = 0; x < g_max_loc_X; ++x) - draw_byte(' ', x, g_loc_Y); - } -#endif -} - -void btext_drawstring(const char *c) -{ - if (!boot_text_mapped) - return; - while (*c) - btext_drawchar(*c++); -} - -void btext_drawhex(unsigned long v) -{ - char *hex_table = "0123456789abcdef"; - - if (!boot_text_mapped) - return; - btext_drawchar(hex_table[(v >> 60) & 0x0000000FUL]); - btext_drawchar(hex_table[(v >> 56) & 0x0000000FUL]); - btext_drawchar(hex_table[(v >> 52) & 0x0000000FUL]); - btext_drawchar(hex_table[(v >> 48) & 0x0000000FUL]); - btext_drawchar(hex_table[(v >> 44) & 0x0000000FUL]); - btext_drawchar(hex_table[(v >> 40) & 0x0000000FUL]); - btext_drawchar(hex_table[(v >> 36) & 0x0000000FUL]); - btext_drawchar(hex_table[(v >> 32) & 0x0000000FUL]); - btext_drawchar(hex_table[(v >> 28) & 0x0000000FUL]); - btext_drawchar(hex_table[(v >> 24) & 0x0000000FUL]); - btext_drawchar(hex_table[(v >> 20) & 0x0000000FUL]); - btext_drawchar(hex_table[(v >> 16) & 0x0000000FUL]); - btext_drawchar(hex_table[(v >> 12) & 0x0000000FUL]); - btext_drawchar(hex_table[(v >> 8) & 0x0000000FUL]); - btext_drawchar(hex_table[(v >> 4) & 0x0000000FUL]); - btext_drawchar(hex_table[(v >> 0) & 0x0000000FUL]); - btext_drawchar(' '); -} - -static void draw_byte(unsigned char c, long locX, long locY) -{ - unsigned char *base = calc_base(locX << 3, locY << 4); - unsigned char *font = &vga_font[((unsigned int)c) * 16]; - int rb = dispDeviceRowBytes; - - switch(dispDeviceDepth) { - case 24: - case 32: - draw_byte_32(font, (unsigned int *)base, rb); - break; - case 15: - case 16: - draw_byte_16(font, (unsigned int *)base, rb); - break; - case 8: - draw_byte_8(font, (unsigned int *)base, rb); - break; - } -} - -static unsigned int expand_bits_8[16] = { - 0x00000000, - 0x000000ff, - 0x0000ff00, - 0x0000ffff, - 0x00ff0000, - 0x00ff00ff, - 0x00ffff00, - 0x00ffffff, - 0xff000000, - 0xff0000ff, - 0xff00ff00, - 0xff00ffff, - 0xffff0000, - 0xffff00ff, - 0xffffff00, - 0xffffffff -}; - -static unsigned int expand_bits_16[4] = { - 0x00000000, - 0x0000ffff, - 0xffff0000, - 0xffffffff -}; - - -static void draw_byte_32(unsigned char *font, unsigned int *base, int rb) -{ - int l, bits; - int fg = 0xFFFFFFFFUL; - int bg = 0x00000000UL; - - for (l = 0; l < 16; ++l) - { - bits = *font++; - base[0] = (-(bits >> 7) & fg) ^ bg; - base[1] = (-((bits >> 6) & 1) & fg) ^ bg; - base[2] = (-((bits >> 5) & 1) & fg) ^ bg; - base[3] = (-((bits >> 4) & 1) & fg) ^ bg; - base[4] = (-((bits >> 3) & 1) & fg) ^ bg; - base[5] = (-((bits >> 2) & 1) & fg) ^ bg; - base[6] = (-((bits >> 1) & 1) & fg) ^ bg; - base[7] = (-(bits & 1) & fg) ^ bg; - base = (unsigned int *) ((char *)base + rb); - } -} - -static void draw_byte_16(unsigned char *font, unsigned int *base, int rb) -{ - int l, bits; - int fg = 0xFFFFFFFFUL; - int bg = 0x00000000UL; - unsigned int *eb = (int *)expand_bits_16; - - for (l = 0; l < 16; ++l) - { - bits = *font++; - base[0] = (eb[bits >> 6] & fg) ^ bg; - base[1] = (eb[(bits >> 4) & 3] & fg) ^ bg; - base[2] = (eb[(bits >> 2) & 3] & fg) ^ bg; - base[3] = (eb[bits & 3] & fg) ^ bg; - base = (unsigned int *) ((char *)base + rb); - } -} - -static void draw_byte_8(unsigned char *font, unsigned int *base, int rb) -{ - int l, bits; - int fg = 0x0F0F0F0FUL; - int bg = 0x00000000UL; - unsigned int *eb = (int *)expand_bits_8; - - for (l = 0; l < 16; ++l) - { - bits = *font++; - base[0] = (eb[bits >> 4] & fg) ^ bg; - base[1] = (eb[bits & 0xf] & fg) ^ bg; - base = (unsigned int *) ((char *)base + rb); - } -} - -static unsigned char vga_font[cmapsz] = { -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd, -0x99, 0x81, 0x81, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xff, -0xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe, -0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, -0x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, -0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00, -0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0x99, 0xbd, -0xbd, 0x99, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x1e, 0x0e, -0x1a, 0x32, 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x3c, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x30, -0x30, 0x70, 0xf0, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x63, -0x7f, 0x63, 0x63, 0x63, 0x63, 0x67, 0xe7, 0xe6, 0xc0, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x18, 0x18, 0xdb, 0x3c, 0xe7, 0x3c, 0xdb, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfe, 0xf8, -0xf0, 0xe0, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x06, 0x0e, -0x1e, 0x3e, 0xfe, 0x3e, 0x1e, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, -0x66, 0x00, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xdb, -0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x00, 0x00, 0x00, 0x00, -0x00, 0x7c, 0xc6, 0x60, 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x38, 0x0c, 0xc6, -0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0xfe, 0xfe, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, -0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc0, -0xc0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x38, 0x7c, 0x7c, 0xfe, 0xfe, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0x7c, 0x7c, -0x38, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x24, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6c, -0x6c, 0xfe, 0x6c, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, -0x18, 0x18, 0x7c, 0xc6, 0xc2, 0xc0, 0x7c, 0x06, 0x06, 0x86, 0xc6, 0x7c, -0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc6, 0x0c, 0x18, -0x30, 0x60, 0xc6, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, -0x6c, 0x38, 0x76, 0xdc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, -0x00, 0x30, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30, -0x30, 0x30, 0x18, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x18, -0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, -0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x02, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xce, 0xde, 0xf6, 0xe6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x38, 0x78, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, -0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x7c, 0xc6, 0x06, 0x06, 0x3c, 0x06, 0x06, 0x06, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x1c, 0x3c, 0x6c, 0xcc, 0xfe, -0x0c, 0x0c, 0x0c, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, -0xc0, 0xc0, 0xfc, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x38, 0x60, 0xc0, 0xc0, 0xfc, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0x06, 0x06, 0x0c, 0x18, -0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, -0xc6, 0xc6, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x06, 0x06, 0x0c, 0x78, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, -0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x06, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, -0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, -0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x0c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xde, 0xde, -0xde, 0xdc, 0xc0, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, -0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x66, 0x66, 0x66, 0x66, 0xfc, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0, -0xc0, 0xc2, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x6c, -0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x62, 0x66, 0xfe, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, -0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, -0xc2, 0xc0, 0xc0, 0xde, 0xc6, 0xc6, 0x66, 0x3a, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x0c, -0x0c, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xe6, 0x66, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0x66, 0xe6, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x60, 0x60, 0x60, 0x60, 0x60, -0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xe7, -0xff, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0xc6, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, -0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, -0x66, 0x66, 0x7c, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xde, 0x7c, -0x0c, 0x0e, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x6c, -0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, -0xc6, 0x60, 0x38, 0x0c, 0x06, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xff, 0xdb, 0x99, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, -0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, -0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x66, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18, -0x3c, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, -0xc3, 0x66, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xff, 0xc3, 0x86, 0x0c, 0x18, 0x30, 0x60, 0xc1, 0xc3, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30, -0x30, 0x30, 0x30, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, -0xc0, 0xe0, 0x70, 0x38, 0x1c, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, -0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x60, -0x60, 0x78, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc0, 0xc0, 0xc0, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x0c, 0x3c, 0x6c, 0xcc, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xf0, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc, -0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xcc, 0x78, 0x00, 0x00, 0x00, 0xe0, 0x60, -0x60, 0x6c, 0x76, 0x66, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x18, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x06, 0x00, 0x0e, 0x06, 0x06, -0x06, 0x06, 0x06, 0x06, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0xe0, 0x60, -0x60, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xdb, -0xdb, 0xdb, 0xdb, 0xdb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66, -0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0x0c, 0x1e, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0x60, 0x60, 0xf0, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0x60, -0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x30, -0x30, 0xfc, 0x30, 0x30, 0x30, 0x30, 0x36, 0x1c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3, -0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, -0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xfe, 0xcc, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x18, 0x18, 0x18, 0x18, 0x0e, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18, -0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x18, -0x18, 0x18, 0x0e, 0x18, 0x18, 0x18, 0x18, 0x70, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, -0xc6, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, -0xc2, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x0c, 0x06, 0x7c, 0x00, 0x00, -0x00, 0x00, 0xcc, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, -0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x00, 0x7c, 0xc6, 0xfe, -0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, -0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xcc, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, -0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0x78, 0x0c, 0x7c, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, -0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x60, 0x60, 0x66, 0x3c, 0x0c, 0x06, -0x3c, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xfe, -0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, -0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x38, 0x18, 0x18, -0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, 0x66, -0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x60, 0x30, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, -0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, 0x00, -0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, -0x18, 0x30, 0x60, 0x00, 0xfe, 0x66, 0x60, 0x7c, 0x60, 0x60, 0x66, 0xfe, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x3b, 0x1b, -0x7e, 0xd8, 0xdc, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x6c, -0xcc, 0xcc, 0xfe, 0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0x00, 0x00, 0x00, 0x00, -0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xc6, -0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, -0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x30, 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, -0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0xcc, 0xcc, 0xcc, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, -0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0x78, 0x00, -0x00, 0xc6, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, -0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, -0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, -0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xe6, 0xfc, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0xff, 0x18, -0xff, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, -0x7c, 0x62, 0x66, 0x6f, 0x66, 0x66, 0x66, 0xf3, 0x00, 0x00, 0x00, 0x00, -0x00, 0x0e, 0x1b, 0x18, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, -0xd8, 0x70, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0x78, 0x0c, 0x7c, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, -0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x18, 0x30, 0x60, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0xcc, 0xcc, 0xcc, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, -0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, -0x76, 0xdc, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, -0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c, -0x38, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x30, 0x30, 0x00, 0x30, 0x30, 0x60, 0xc0, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, -0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xfe, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x60, 0xce, 0x9b, 0x06, -0x0c, 0x1f, 0x00, 0x00, 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, -0x66, 0xce, 0x96, 0x3e, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, -0x00, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x6c, 0xd8, 0x6c, 0x36, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0x6c, 0x36, -0x6c, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x44, 0x11, 0x44, -0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, -0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, -0x55, 0xaa, 0x55, 0xaa, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, -0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0xf8, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, -0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0xf6, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, -0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, -0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, -0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xf0, 0xf0, 0xf0, -0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, -0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, -0x0f, 0x0f, 0x0f, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x76, 0xdc, 0xd8, 0xd8, 0xd8, 0xdc, 0x76, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xd8, 0xcc, 0xc6, 0xc6, 0xc6, 0xcc, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0xc6, 0xc0, 0xc0, 0xc0, -0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0xfe, 0xc6, 0x60, 0x30, 0x18, 0x30, 0x60, 0xc6, 0xfe, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8, -0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x18, 0x3c, 0x66, 0x66, -0x66, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, -0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x6c, 0x6c, 0x6c, 0xee, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x30, 0x18, 0x0c, 0x3e, 0x66, -0x66, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x7e, 0xdb, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x03, 0x06, 0x7e, 0xdb, 0xdb, 0xf3, 0x7e, 0x60, 0xc0, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x30, 0x60, 0x60, 0x7c, 0x60, -0x60, 0x60, 0x30, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, -0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18, -0x18, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, -0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x00, 0x7e, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x1b, 0x1b, 0x1b, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x7e, 0x00, 0x18, 0x18, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x00, -0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c, -0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x0c, 0x0c, -0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x6c, 0x3c, 0x1c, 0x00, 0x00, 0x00, 0x00, -0x00, 0xd8, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0xd8, 0x30, 0x60, 0xc8, 0xf8, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, -}; diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S deleted file mode 100644 index 1c869ea72d28..000000000000 --- a/arch/ppc64/kernel/head.S +++ /dev/null @@ -1,2007 +0,0 @@ -/* - * arch/ppc64/kernel/head.S - * - * PowerPC version - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP - * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu> - * Adapted for Power Macintosh by Paul Mackerras. - * Low-level exception handlers and MMU support - * rewritten by Paul Mackerras. - * Copyright (C) 1996 Paul Mackerras. - * - * Adapted for 64bit PowerPC by Dave Engebretsen, Peter Bergner, and - * Mike Corrigan {engebret|bergner|mikejc}@us.ibm.com - * - * This file contains the low-level support and setup for the - * PowerPC-64 platform, including trap and interrupt dispatch. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/config.h> -#include <linux/threads.h> -#include <asm/processor.h> -#include <asm/page.h> -#include <asm/mmu.h> -#include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/bug.h> -#include <asm/cputable.h> -#include <asm/setup.h> -#include <asm/hvcall.h> -#include <asm/iseries/lpar_map.h> -#include <asm/thread_info.h> - -#ifdef CONFIG_PPC_ISERIES -#define DO_SOFT_DISABLE -#endif - -/* - * We layout physical memory as follows: - * 0x0000 - 0x00ff : Secondary processor spin code - * 0x0100 - 0x2fff : pSeries Interrupt prologs - * 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs - * 0x6000 - 0x6fff : Initial (CPU0) segment table - * 0x7000 - 0x7fff : FWNMI data area - * 0x8000 - : Early init and support code - */ - -/* - * SPRG Usage - * - * Register Definition - * - * SPRG0 reserved for hypervisor - * SPRG1 temp - used to save gpr - * SPRG2 temp - used to save gpr - * SPRG3 virt addr of paca - */ - -/* - * Entering into this code we make the following assumptions: - * For pSeries: - * 1. The MMU is off & open firmware is running in real mode. - * 2. The kernel is entered at __start - * - * For iSeries: - * 1. The MMU is on (as it always is for iSeries) - * 2. The kernel is entered at system_reset_iSeries - */ - - .text - .globl _stext -_stext: -#ifdef CONFIG_PPC_MULTIPLATFORM -_GLOBAL(__start) - /* NOP this out unconditionally */ -BEGIN_FTR_SECTION - b .__start_initialization_multiplatform -END_FTR_SECTION(0, 1) -#endif /* CONFIG_PPC_MULTIPLATFORM */ - - /* Catch branch to 0 in real mode */ - trap - -#ifdef CONFIG_PPC_ISERIES - /* - * At offset 0x20, there is a pointer to iSeries LPAR data. - * This is required by the hypervisor - */ - . = 0x20 - .llong hvReleaseData-KERNELBASE - - /* - * At offset 0x28 and 0x30 are offsets to the mschunks_map - * array (used by the iSeries LPAR debugger to do translation - * between physical addresses and absolute addresses) and - * to the pidhash table (also used by the debugger) - */ - .llong mschunks_map-KERNELBASE - .llong 0 /* pidhash-KERNELBASE SFRXXX */ - - /* Offset 0x38 - Pointer to start of embedded System.map */ - .globl embedded_sysmap_start -embedded_sysmap_start: - .llong 0 - /* Offset 0x40 - Pointer to end of embedded System.map */ - .globl embedded_sysmap_end -embedded_sysmap_end: - .llong 0 - -#endif /* CONFIG_PPC_ISERIES */ - - /* Secondary processors spin on this value until it goes to 1. */ - .globl __secondary_hold_spinloop -__secondary_hold_spinloop: - .llong 0x0 - - /* Secondary processors write this value with their cpu # */ - /* after they enter the spin loop immediately below. */ - .globl __secondary_hold_acknowledge -__secondary_hold_acknowledge: - .llong 0x0 - - . = 0x60 -/* - * The following code is used on pSeries to hold secondary processors - * in a spin loop after they have been freed from OpenFirmware, but - * before the bulk of the kernel has been relocated. This code - * is relocated to physical address 0x60 before prom_init is run. - * All of it must fit below the first exception vector at 0x100. - */ -_GLOBAL(__secondary_hold) - mfmsr r24 - ori r24,r24,MSR_RI - mtmsrd r24 /* RI on */ - - /* Grab our linux cpu number */ - mr r24,r3 - - /* Tell the master cpu we're here */ - /* Relocation is off & we are located at an address less */ - /* than 0x100, so only need to grab low order offset. */ - std r24,__secondary_hold_acknowledge@l(0) - sync - - /* All secondary cpus wait here until told to start. */ -100: ld r4,__secondary_hold_spinloop@l(0) - cmpdi 0,r4,1 - bne 100b - -#ifdef CONFIG_HMT - b .hmt_init -#else -#ifdef CONFIG_SMP - mr r3,r24 - b .pSeries_secondary_smp_init -#else - BUG_OPCODE -#endif -#endif - -/* This value is used to mark exception frames on the stack. */ - .section ".toc","aw" -exception_marker: - .tc ID_72656773_68657265[TC],0x7265677368657265 - .text - -/* - * The following macros define the code that appears as - * the prologue to each of the exception handlers. They - * are split into two parts to allow a single kernel binary - * to be used for pSeries and iSeries. - * LOL. One day... - paulus - */ - -/* - * We make as much of the exception code common between native - * exception handlers (including pSeries LPAR) and iSeries LPAR - * implementations as possible. - */ - -/* - * This is the start of the interrupt handlers for pSeries - * This code runs with relocation off. - */ -#define EX_R9 0 -#define EX_R10 8 -#define EX_R11 16 -#define EX_R12 24 -#define EX_R13 32 -#define EX_SRR0 40 -#define EX_DAR 48 -#define EX_DSISR 56 -#define EX_CCR 60 -#define EX_R3 64 -#define EX_LR 72 - -#define EXCEPTION_PROLOG_PSERIES(area, label) \ - mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \ - std r9,area+EX_R9(r13); /* save r9 - r12 */ \ - std r10,area+EX_R10(r13); \ - std r11,area+EX_R11(r13); \ - std r12,area+EX_R12(r13); \ - mfspr r9,SPRN_SPRG1; \ - std r9,area+EX_R13(r13); \ - mfcr r9; \ - clrrdi r12,r13,32; /* get high part of &label */ \ - mfmsr r10; \ - mfspr r11,SPRN_SRR0; /* save SRR0 */ \ - ori r12,r12,(label)@l; /* virt addr of handler */ \ - ori r10,r10,MSR_IR|MSR_DR|MSR_RI; \ - mtspr SPRN_SRR0,r12; \ - mfspr r12,SPRN_SRR1; /* and SRR1 */ \ - mtspr SPRN_SRR1,r10; \ - rfid; \ - b . /* prevent speculative execution */ - -/* - * This is the start of the interrupt handlers for iSeries - * This code runs with relocation on. - */ -#define EXCEPTION_PROLOG_ISERIES_1(area) \ - mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \ - std r9,area+EX_R9(r13); /* save r9 - r12 */ \ - std r10,area+EX_R10(r13); \ - std r11,area+EX_R11(r13); \ - std r12,area+EX_R12(r13); \ - mfspr r9,SPRN_SPRG1; \ - std r9,area+EX_R13(r13); \ - mfcr r9 - -#define EXCEPTION_PROLOG_ISERIES_2 \ - mfmsr r10; \ - ld r11,PACALPPACA+LPPACASRR0(r13); \ - ld r12,PACALPPACA+LPPACASRR1(r13); \ - ori r10,r10,MSR_RI; \ - mtmsrd r10,1 - -/* - * The common exception prolog is used for all except a few exceptions - * such as a segment miss on a kernel address. We have to be prepared - * to take another exception from the point where we first touch the - * kernel stack onwards. - * - * On entry r13 points to the paca, r9-r13 are saved in the paca, - * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and - * SRR1, and relocation is on. - */ -#define EXCEPTION_PROLOG_COMMON(n, area) \ - andi. r10,r12,MSR_PR; /* See if coming from user */ \ - mr r10,r1; /* Save r1 */ \ - subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ \ - beq- 1f; \ - ld r1,PACAKSAVE(r13); /* kernel stack to use */ \ -1: cmpdi cr1,r1,0; /* check if r1 is in userspace */ \ - bge- cr1,bad_stack; /* abort if it is */ \ - std r9,_CCR(r1); /* save CR in stackframe */ \ - std r11,_NIP(r1); /* save SRR0 in stackframe */ \ - std r12,_MSR(r1); /* save SRR1 in stackframe */ \ - std r10,0(r1); /* make stack chain pointer */ \ - std r0,GPR0(r1); /* save r0 in stackframe */ \ - std r10,GPR1(r1); /* save r1 in stackframe */ \ - std r2,GPR2(r1); /* save r2 in stackframe */ \ - SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ - SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ - ld r9,area+EX_R9(r13); /* move r9, r10 to stackframe */ \ - ld r10,area+EX_R10(r13); \ - std r9,GPR9(r1); \ - std r10,GPR10(r1); \ - ld r9,area+EX_R11(r13); /* move r11 - r13 to stackframe */ \ - ld r10,area+EX_R12(r13); \ - ld r11,area+EX_R13(r13); \ - std r9,GPR11(r1); \ - std r10,GPR12(r1); \ - std r11,GPR13(r1); \ - ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \ - mflr r9; /* save LR in stackframe */ \ - std r9,_LINK(r1); \ - mfctr r10; /* save CTR in stackframe */ \ - std r10,_CTR(r1); \ - mfspr r11,SPRN_XER; /* save XER in stackframe */ \ - std r11,_XER(r1); \ - li r9,(n)+1; \ - std r9,_TRAP(r1); /* set trap number */ \ - li r10,0; \ - ld r11,exception_marker@toc(r2); \ - std r10,RESULT(r1); /* clear regs->result */ \ - std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ - -/* - * Exception vectors. - */ -#define STD_EXCEPTION_PSERIES(n, label) \ - . = n; \ - .globl label##_pSeries; \ -label##_pSeries: \ - HMT_MEDIUM; \ - mtspr SPRN_SPRG1,r13; /* save r13 */ \ - RUNLATCH_ON(r13); \ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common) - -#define STD_EXCEPTION_ISERIES(n, label, area) \ - .globl label##_iSeries; \ -label##_iSeries: \ - HMT_MEDIUM; \ - mtspr SPRN_SPRG1,r13; /* save r13 */ \ - RUNLATCH_ON(r13); \ - EXCEPTION_PROLOG_ISERIES_1(area); \ - EXCEPTION_PROLOG_ISERIES_2; \ - b label##_common - -#define MASKABLE_EXCEPTION_ISERIES(n, label) \ - .globl label##_iSeries; \ -label##_iSeries: \ - HMT_MEDIUM; \ - mtspr SPRN_SPRG1,r13; /* save r13 */ \ - RUNLATCH_ON(r13); \ - EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN); \ - lbz r10,PACAPROCENABLED(r13); \ - cmpwi 0,r10,0; \ - beq- label##_iSeries_masked; \ - EXCEPTION_PROLOG_ISERIES_2; \ - b label##_common; \ - -#ifdef DO_SOFT_DISABLE -#define DISABLE_INTS \ - lbz r10,PACAPROCENABLED(r13); \ - li r11,0; \ - std r10,SOFTE(r1); \ - mfmsr r10; \ - stb r11,PACAPROCENABLED(r13); \ - ori r10,r10,MSR_EE; \ - mtmsrd r10,1 - -#define ENABLE_INTS \ - lbz r10,PACAPROCENABLED(r13); \ - mfmsr r11; \ - std r10,SOFTE(r1); \ - ori r11,r11,MSR_EE; \ - mtmsrd r11,1 - -#else /* hard enable/disable interrupts */ -#define DISABLE_INTS - -#define ENABLE_INTS \ - ld r12,_MSR(r1); \ - mfmsr r11; \ - rlwimi r11,r12,0,MSR_EE; \ - mtmsrd r11,1 - -#endif - -#define STD_EXCEPTION_COMMON(trap, label, hdlr) \ - .align 7; \ - .globl label##_common; \ -label##_common: \ - EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ - DISABLE_INTS; \ - bl .save_nvgprs; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - bl hdlr; \ - b .ret_from_except - -#define STD_EXCEPTION_COMMON_LITE(trap, label, hdlr) \ - .align 7; \ - .globl label##_common; \ -label##_common: \ - EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ - DISABLE_INTS; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - bl hdlr; \ - b .ret_from_except_lite - -/* - * Start of pSeries system interrupt routines - */ - . = 0x100 - .globl __start_interrupts -__start_interrupts: - - STD_EXCEPTION_PSERIES(0x100, system_reset) - - . = 0x200 -_machine_check_pSeries: - HMT_MEDIUM - mtspr SPRN_SPRG1,r13 /* save r13 */ - RUNLATCH_ON(r13) - EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) - - . = 0x300 - .globl data_access_pSeries -data_access_pSeries: - HMT_MEDIUM - mtspr SPRN_SPRG1,r13 -BEGIN_FTR_SECTION - mtspr SPRN_SPRG2,r12 - mfspr r13,SPRN_DAR - mfspr r12,SPRN_DSISR - srdi r13,r13,60 - rlwimi r13,r12,16,0x20 - mfcr r12 - cmpwi r13,0x2c - beq .do_stab_bolted_pSeries - mtcrf 0x80,r12 - mfspr r12,SPRN_SPRG2 -END_FTR_SECTION_IFCLR(CPU_FTR_SLB) - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common) - - . = 0x380 - .globl data_access_slb_pSeries -data_access_slb_pSeries: - HMT_MEDIUM - mtspr SPRN_SPRG1,r13 - RUNLATCH_ON(r13) - mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ - std r3,PACA_EXSLB+EX_R3(r13) - mfspr r3,SPRN_DAR - std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ - mfcr r9 -#ifdef __DISABLED__ - /* Keep that around for when we re-implement dynamic VSIDs */ - cmpdi r3,0 - bge slb_miss_user_pseries -#endif /* __DISABLED__ */ - std r10,PACA_EXSLB+EX_R10(r13) - std r11,PACA_EXSLB+EX_R11(r13) - std r12,PACA_EXSLB+EX_R12(r13) - mfspr r10,SPRN_SPRG1 - std r10,PACA_EXSLB+EX_R13(r13) - mfspr r12,SPRN_SRR1 /* and SRR1 */ - b .slb_miss_realmode /* Rel. branch works in real mode */ - - STD_EXCEPTION_PSERIES(0x400, instruction_access) - - . = 0x480 - .globl instruction_access_slb_pSeries -instruction_access_slb_pSeries: - HMT_MEDIUM - mtspr SPRN_SPRG1,r13 - RUNLATCH_ON(r13) - mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ - std r3,PACA_EXSLB+EX_R3(r13) - mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ - std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ - mfcr r9 -#ifdef __DISABLED__ - /* Keep that around for when we re-implement dynamic VSIDs */ - cmpdi r3,0 - bge slb_miss_user_pseries -#endif /* __DISABLED__ */ - std r10,PACA_EXSLB+EX_R10(r13) - std r11,PACA_EXSLB+EX_R11(r13) - std r12,PACA_EXSLB+EX_R12(r13) - mfspr r10,SPRN_SPRG1 - std r10,PACA_EXSLB+EX_R13(r13) - mfspr r12,SPRN_SRR1 /* and SRR1 */ - b .slb_miss_realmode /* Rel. branch works in real mode */ - - STD_EXCEPTION_PSERIES(0x500, hardware_interrupt) - STD_EXCEPTION_PSERIES(0x600, alignment) - STD_EXCEPTION_PSERIES(0x700, program_check) - STD_EXCEPTION_PSERIES(0x800, fp_unavailable) - STD_EXCEPTION_PSERIES(0x900, decrementer) - STD_EXCEPTION_PSERIES(0xa00, trap_0a) - STD_EXCEPTION_PSERIES(0xb00, trap_0b) - - . = 0xc00 - .globl system_call_pSeries -system_call_pSeries: - HMT_MEDIUM - RUNLATCH_ON(r9) - mr r9,r13 - mfmsr r10 - mfspr r13,SPRN_SPRG3 - mfspr r11,SPRN_SRR0 - clrrdi r12,r13,32 - oris r12,r12,system_call_common@h - ori r12,r12,system_call_common@l - mtspr SPRN_SRR0,r12 - ori r10,r10,MSR_IR|MSR_DR|MSR_RI - mfspr r12,SPRN_SRR1 - mtspr SPRN_SRR1,r10 - rfid - b . /* prevent speculative execution */ - - STD_EXCEPTION_PSERIES(0xd00, single_step) - STD_EXCEPTION_PSERIES(0xe00, trap_0e) - - /* We need to deal with the Altivec unavailable exception - * here which is at 0xf20, thus in the middle of the - * prolog code of the PerformanceMonitor one. A little - * trickery is thus necessary - */ - . = 0xf00 - b performance_monitor_pSeries - - STD_EXCEPTION_PSERIES(0xf20, altivec_unavailable) - - STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint) - STD_EXCEPTION_PSERIES(0x1700, altivec_assist) - - . = 0x3000 - -/*** pSeries interrupt support ***/ - - /* moved from 0xf00 */ - STD_EXCEPTION_PSERIES(., performance_monitor) - - .align 7 -_GLOBAL(do_stab_bolted_pSeries) - mtcrf 0x80,r12 - mfspr r12,SPRN_SPRG2 - EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) - -/* - * We have some room here we use that to put - * the peries slb miss user trampoline code so it's reasonably - * away from slb_miss_user_common to avoid problems with rfid - * - * This is used for when the SLB miss handler has to go virtual, - * which doesn't happen for now anymore but will once we re-implement - * dynamic VSIDs for shared page tables - */ -#ifdef __DISABLED__ -slb_miss_user_pseries: - std r10,PACA_EXGEN+EX_R10(r13) - std r11,PACA_EXGEN+EX_R11(r13) - std r12,PACA_EXGEN+EX_R12(r13) - mfspr r10,SPRG1 - ld r11,PACA_EXSLB+EX_R9(r13) - ld r12,PACA_EXSLB+EX_R3(r13) - std r10,PACA_EXGEN+EX_R13(r13) - std r11,PACA_EXGEN+EX_R9(r13) - std r12,PACA_EXGEN+EX_R3(r13) - clrrdi r12,r13,32 - mfmsr r10 - mfspr r11,SRR0 /* save SRR0 */ - ori r12,r12,slb_miss_user_common@l /* virt addr of handler */ - ori r10,r10,MSR_IR|MSR_DR|MSR_RI - mtspr SRR0,r12 - mfspr r12,SRR1 /* and SRR1 */ - mtspr SRR1,r10 - rfid - b . /* prevent spec. execution */ -#endif /* __DISABLED__ */ - -/* - * Vectors for the FWNMI option. Share common code. - */ - .globl system_reset_fwnmi -system_reset_fwnmi: - HMT_MEDIUM - mtspr SPRN_SPRG1,r13 /* save r13 */ - RUNLATCH_ON(r13) - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) - - .globl machine_check_fwnmi -machine_check_fwnmi: - HMT_MEDIUM - mtspr SPRN_SPRG1,r13 /* save r13 */ - RUNLATCH_ON(r13) - EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) - -#ifdef CONFIG_PPC_ISERIES -/*** ISeries-LPAR interrupt handlers ***/ - - STD_EXCEPTION_ISERIES(0x200, machine_check, PACA_EXMC) - - .globl data_access_iSeries -data_access_iSeries: - mtspr SPRN_SPRG1,r13 -BEGIN_FTR_SECTION - mtspr SPRN_SPRG2,r12 - mfspr r13,SPRN_DAR - mfspr r12,SPRN_DSISR - srdi r13,r13,60 - rlwimi r13,r12,16,0x20 - mfcr r12 - cmpwi r13,0x2c - beq .do_stab_bolted_iSeries - mtcrf 0x80,r12 - mfspr r12,SPRN_SPRG2 -END_FTR_SECTION_IFCLR(CPU_FTR_SLB) - EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN) - EXCEPTION_PROLOG_ISERIES_2 - b data_access_common - -.do_stab_bolted_iSeries: - mtcrf 0x80,r12 - mfspr r12,SPRN_SPRG2 - EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) - EXCEPTION_PROLOG_ISERIES_2 - b .do_stab_bolted - - .globl data_access_slb_iSeries -data_access_slb_iSeries: - mtspr SPRN_SPRG1,r13 /* save r13 */ - mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ - std r3,PACA_EXSLB+EX_R3(r13) - mfspr r3,SPRN_DAR - std r9,PACA_EXSLB+EX_R9(r13) - mfcr r9 -#ifdef __DISABLED__ - cmpdi r3,0 - bge slb_miss_user_iseries -#endif - std r10,PACA_EXSLB+EX_R10(r13) - std r11,PACA_EXSLB+EX_R11(r13) - std r12,PACA_EXSLB+EX_R12(r13) - mfspr r10,SPRN_SPRG1 - std r10,PACA_EXSLB+EX_R13(r13) - ld r12,PACALPPACA+LPPACASRR1(r13); - b .slb_miss_realmode - - STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN) - - .globl instruction_access_slb_iSeries -instruction_access_slb_iSeries: - mtspr SPRN_SPRG1,r13 /* save r13 */ - mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ - std r3,PACA_EXSLB+EX_R3(r13) - ld r3,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */ - std r9,PACA_EXSLB+EX_R9(r13) - mfcr r9 -#ifdef __DISABLED__ - cmpdi r3,0 - bge .slb_miss_user_iseries -#endif - std r10,PACA_EXSLB+EX_R10(r13) - std r11,PACA_EXSLB+EX_R11(r13) - std r12,PACA_EXSLB+EX_R12(r13) - mfspr r10,SPRN_SPRG1 - std r10,PACA_EXSLB+EX_R13(r13) - ld r12,PACALPPACA+LPPACASRR1(r13); - b .slb_miss_realmode - -#ifdef __DISABLED__ -slb_miss_user_iseries: - std r10,PACA_EXGEN+EX_R10(r13) - std r11,PACA_EXGEN+EX_R11(r13) - std r12,PACA_EXGEN+EX_R12(r13) - mfspr r10,SPRG1 - ld r11,PACA_EXSLB+EX_R9(r13) - ld r12,PACA_EXSLB+EX_R3(r13) - std r10,PACA_EXGEN+EX_R13(r13) - std r11,PACA_EXGEN+EX_R9(r13) - std r12,PACA_EXGEN+EX_R3(r13) - EXCEPTION_PROLOG_ISERIES_2 - b slb_miss_user_common -#endif - - MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt) - STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN) - STD_EXCEPTION_ISERIES(0x700, program_check, PACA_EXGEN) - STD_EXCEPTION_ISERIES(0x800, fp_unavailable, PACA_EXGEN) - MASKABLE_EXCEPTION_ISERIES(0x900, decrementer) - STD_EXCEPTION_ISERIES(0xa00, trap_0a, PACA_EXGEN) - STD_EXCEPTION_ISERIES(0xb00, trap_0b, PACA_EXGEN) - - .globl system_call_iSeries -system_call_iSeries: - mr r9,r13 - mfspr r13,SPRN_SPRG3 - EXCEPTION_PROLOG_ISERIES_2 - b system_call_common - - STD_EXCEPTION_ISERIES( 0xd00, single_step, PACA_EXGEN) - STD_EXCEPTION_ISERIES( 0xe00, trap_0e, PACA_EXGEN) - STD_EXCEPTION_ISERIES( 0xf00, performance_monitor, PACA_EXGEN) - - .globl system_reset_iSeries -system_reset_iSeries: - mfspr r13,SPRN_SPRG3 /* Get paca address */ - mfmsr r24 - ori r24,r24,MSR_RI - mtmsrd r24 /* RI on */ - lhz r24,PACAPACAINDEX(r13) /* Get processor # */ - cmpwi 0,r24,0 /* Are we processor 0? */ - beq .__start_initialization_iSeries /* Start up the first processor */ - mfspr r4,SPRN_CTRLF - li r5,CTRL_RUNLATCH /* Turn off the run light */ - andc r4,r4,r5 - mtspr SPRN_CTRLT,r4 - -1: - HMT_LOW -#ifdef CONFIG_SMP - lbz r23,PACAPROCSTART(r13) /* Test if this processor - * should start */ - sync - LOADADDR(r3,current_set) - sldi r28,r24,3 /* get current_set[cpu#] */ - ldx r3,r3,r28 - addi r1,r3,THREAD_SIZE - subi r1,r1,STACK_FRAME_OVERHEAD - - cmpwi 0,r23,0 - beq iSeries_secondary_smp_loop /* Loop until told to go */ - bne .__secondary_start /* Loop until told to go */ -iSeries_secondary_smp_loop: - /* Let the Hypervisor know we are alive */ - /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */ - lis r3,0x8002 - rldicr r3,r3,32,15 /* r0 = (r3 << 32) & 0xffff000000000000 */ -#else /* CONFIG_SMP */ - /* Yield the processor. This is required for non-SMP kernels - which are running on multi-threaded machines. */ - lis r3,0x8000 - rldicr r3,r3,32,15 /* r3 = (r3 << 32) & 0xffff000000000000 */ - addi r3,r3,18 /* r3 = 0x8000000000000012 which is "yield" */ - li r4,0 /* "yield timed" */ - li r5,-1 /* "yield forever" */ -#endif /* CONFIG_SMP */ - li r0,-1 /* r0=-1 indicates a Hypervisor call */ - sc /* Invoke the hypervisor via a system call */ - mfspr r13,SPRN_SPRG3 /* Put r13 back ???? */ - b 1b /* If SMP not configured, secondaries - * loop forever */ - - .globl decrementer_iSeries_masked -decrementer_iSeries_masked: - li r11,1 - stb r11,PACALPPACA+LPPACADECRINT(r13) - lwz r12,PACADEFAULTDECR(r13) - mtspr SPRN_DEC,r12 - /* fall through */ - - .globl hardware_interrupt_iSeries_masked -hardware_interrupt_iSeries_masked: - mtcrf 0x80,r9 /* Restore regs */ - ld r11,PACALPPACA+LPPACASRR0(r13) - ld r12,PACALPPACA+LPPACASRR1(r13) - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r12 - ld r9,PACA_EXGEN+EX_R9(r13) - ld r10,PACA_EXGEN+EX_R10(r13) - ld r11,PACA_EXGEN+EX_R11(r13) - ld r12,PACA_EXGEN+EX_R12(r13) - ld r13,PACA_EXGEN+EX_R13(r13) - rfid - b . /* prevent speculative execution */ -#endif /* CONFIG_PPC_ISERIES */ - -/*** Common interrupt handlers ***/ - - STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception) - - /* - * Machine check is different because we use a different - * save area: PACA_EXMC instead of PACA_EXGEN. - */ - .align 7 - .globl machine_check_common -machine_check_common: - EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC) - DISABLE_INTS - bl .save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD - bl .machine_check_exception - b .ret_from_except - - STD_EXCEPTION_COMMON_LITE(0x900, decrementer, .timer_interrupt) - STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception) - STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) - STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) - STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception) - STD_EXCEPTION_COMMON(0xf00, performance_monitor, .performance_monitor_exception) - STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception) -#ifdef CONFIG_ALTIVEC - STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception) -#else - STD_EXCEPTION_COMMON(0x1700, altivec_assist, .unknown_exception) -#endif - -/* - * Here we have detected that the kernel stack pointer is bad. - * R9 contains the saved CR, r13 points to the paca, - * r10 contains the (bad) kernel stack pointer, - * r11 and r12 contain the saved SRR0 and SRR1. - * We switch to using an emergency stack, save the registers there, - * and call kernel_bad_stack(), which panics. - */ -bad_stack: - ld r1,PACAEMERGSP(r13) - subi r1,r1,64+INT_FRAME_SIZE - std r9,_CCR(r1) - std r10,GPR1(r1) - std r11,_NIP(r1) - std r12,_MSR(r1) - mfspr r11,SPRN_DAR - mfspr r12,SPRN_DSISR - std r11,_DAR(r1) - std r12,_DSISR(r1) - mflr r10 - mfctr r11 - mfxer r12 - std r10,_LINK(r1) - std r11,_CTR(r1) - std r12,_XER(r1) - SAVE_GPR(0,r1) - SAVE_GPR(2,r1) - SAVE_4GPRS(3,r1) - SAVE_2GPRS(7,r1) - SAVE_10GPRS(12,r1) - SAVE_10GPRS(22,r1) - addi r11,r1,INT_FRAME_SIZE - std r11,0(r1) - li r12,0 - std r12,0(r11) - ld r2,PACATOC(r13) -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl .kernel_bad_stack - b 1b - -/* - * Return from an exception with minimal checks. - * The caller is assumed to have done EXCEPTION_PROLOG_COMMON. - * If interrupts have been enabled, or anything has been - * done that might have changed the scheduling status of - * any task or sent any task a signal, you should use - * ret_from_except or ret_from_except_lite instead of this. - */ - .globl fast_exception_return -fast_exception_return: - ld r12,_MSR(r1) - ld r11,_NIP(r1) - andi. r3,r12,MSR_RI /* check if RI is set */ - beq- unrecov_fer - ld r3,_CCR(r1) - ld r4,_LINK(r1) - ld r5,_CTR(r1) - ld r6,_XER(r1) - mtcr r3 - mtlr r4 - mtctr r5 - mtxer r6 - REST_GPR(0, r1) - REST_8GPRS(2, r1) - - mfmsr r10 - clrrdi r10,r10,2 /* clear RI (LE is 0 already) */ - mtmsrd r10,1 - - mtspr SPRN_SRR1,r12 - mtspr SPRN_SRR0,r11 - REST_4GPRS(10, r1) - ld r1,GPR1(r1) - rfid - b . /* prevent speculative execution */ - -unrecov_fer: - bl .save_nvgprs -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl .unrecoverable_exception - b 1b - -/* - * Here r13 points to the paca, r9 contains the saved CR, - * SRR0 and SRR1 are saved in r11 and r12, - * r9 - r13 are saved in paca->exgen. - */ - .align 7 - .globl data_access_common -data_access_common: - RUNLATCH_ON(r10) /* It wont fit in the 0x300 handler */ - mfspr r10,SPRN_DAR - std r10,PACA_EXGEN+EX_DAR(r13) - mfspr r10,SPRN_DSISR - stw r10,PACA_EXGEN+EX_DSISR(r13) - EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN) - ld r3,PACA_EXGEN+EX_DAR(r13) - lwz r4,PACA_EXGEN+EX_DSISR(r13) - li r5,0x300 - b .do_hash_page /* Try to handle as hpte fault */ - - .align 7 - .globl instruction_access_common -instruction_access_common: - EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN) - ld r3,_NIP(r1) - andis. r4,r12,0x5820 - li r5,0x400 - b .do_hash_page /* Try to handle as hpte fault */ - -/* - * Here is the common SLB miss user that is used when going to virtual - * mode for SLB misses, that is currently not used - */ -#ifdef __DISABLED__ - .align 7 - .globl slb_miss_user_common -slb_miss_user_common: - mflr r10 - std r3,PACA_EXGEN+EX_DAR(r13) - stw r9,PACA_EXGEN+EX_CCR(r13) - std r10,PACA_EXGEN+EX_LR(r13) - std r11,PACA_EXGEN+EX_SRR0(r13) - bl .slb_allocate_user - - ld r10,PACA_EXGEN+EX_LR(r13) - ld r3,PACA_EXGEN+EX_R3(r13) - lwz r9,PACA_EXGEN+EX_CCR(r13) - ld r11,PACA_EXGEN+EX_SRR0(r13) - mtlr r10 - beq- slb_miss_fault - - andi. r10,r12,MSR_RI /* check for unrecoverable exception */ - beq- unrecov_user_slb - mfmsr r10 - -.machine push -.machine "power4" - mtcrf 0x80,r9 -.machine pop - - clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */ - mtmsrd r10,1 - - mtspr SRR0,r11 - mtspr SRR1,r12 - - ld r9,PACA_EXGEN+EX_R9(r13) - ld r10,PACA_EXGEN+EX_R10(r13) - ld r11,PACA_EXGEN+EX_R11(r13) - ld r12,PACA_EXGEN+EX_R12(r13) - ld r13,PACA_EXGEN+EX_R13(r13) - rfid - b . - -slb_miss_fault: - EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN) - ld r4,PACA_EXGEN+EX_DAR(r13) - li r5,0 - std r4,_DAR(r1) - std r5,_DSISR(r1) - b .handle_page_fault - -unrecov_user_slb: - EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN) - DISABLE_INTS - bl .save_nvgprs -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl .unrecoverable_exception - b 1b - -#endif /* __DISABLED__ */ - - -/* - * r13 points to the PACA, r9 contains the saved CR, - * r12 contain the saved SRR1, SRR0 is still ready for return - * r3 has the faulting address - * r9 - r13 are saved in paca->exslb. - * r3 is saved in paca->slb_r3 - * We assume we aren't going to take any exceptions during this procedure. - */ -_GLOBAL(slb_miss_realmode) - mflr r10 - - stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ - std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ - - bl .slb_allocate_realmode - - /* All done -- return from exception. */ - - ld r10,PACA_EXSLB+EX_LR(r13) - ld r3,PACA_EXSLB+EX_R3(r13) - lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ -#ifdef CONFIG_PPC_ISERIES - ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */ -#endif /* CONFIG_PPC_ISERIES */ - - mtlr r10 - - andi. r10,r12,MSR_RI /* check for unrecoverable exception */ - beq- unrecov_slb - -.machine push -.machine "power4" - mtcrf 0x80,r9 - mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ -.machine pop - -#ifdef CONFIG_PPC_ISERIES - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r12 -#endif /* CONFIG_PPC_ISERIES */ - ld r9,PACA_EXSLB+EX_R9(r13) - ld r10,PACA_EXSLB+EX_R10(r13) - ld r11,PACA_EXSLB+EX_R11(r13) - ld r12,PACA_EXSLB+EX_R12(r13) - ld r13,PACA_EXSLB+EX_R13(r13) - rfid - b . /* prevent speculative execution */ - -unrecov_slb: - EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) - DISABLE_INTS - bl .save_nvgprs -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl .unrecoverable_exception - b 1b - - .align 7 - .globl hardware_interrupt_common - .globl hardware_interrupt_entry -hardware_interrupt_common: - EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN) -hardware_interrupt_entry: - DISABLE_INTS - addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_IRQ - b .ret_from_except_lite - - .align 7 - .globl alignment_common -alignment_common: - mfspr r10,SPRN_DAR - std r10,PACA_EXGEN+EX_DAR(r13) - mfspr r10,SPRN_DSISR - stw r10,PACA_EXGEN+EX_DSISR(r13) - EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN) - ld r3,PACA_EXGEN+EX_DAR(r13) - lwz r4,PACA_EXGEN+EX_DSISR(r13) - std r3,_DAR(r1) - std r4,_DSISR(r1) - bl .save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD - ENABLE_INTS - bl .alignment_exception - b .ret_from_except - - .align 7 - .globl program_check_common -program_check_common: - EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) - bl .save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD - ENABLE_INTS - bl .program_check_exception - b .ret_from_except - - .align 7 - .globl fp_unavailable_common -fp_unavailable_common: - EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) - bne .load_up_fpu /* if from user, just load it up */ - bl .save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD - ENABLE_INTS - bl .kernel_fp_unavailable_exception - BUG_OPCODE - - .align 7 - .globl altivec_unavailable_common -altivec_unavailable_common: - EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN) -#ifdef CONFIG_ALTIVEC -BEGIN_FTR_SECTION - bne .load_up_altivec /* if from user, just load it up */ -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) -#endif - bl .save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD - ENABLE_INTS - bl .altivec_unavailable_exception - b .ret_from_except - -#ifdef CONFIG_ALTIVEC -/* - * load_up_altivec(unused, unused, tsk) - * Disable VMX for the task which had it previously, - * and save its vector registers in its thread_struct. - * Enables the VMX for use in the kernel on return. - * On SMP we know the VMX is free, since we give it up every - * switch (ie, no lazy save of the vector registers). - * On entry: r13 == 'current' && last_task_used_altivec != 'current' - */ -_STATIC(load_up_altivec) - mfmsr r5 /* grab the current MSR */ - oris r5,r5,MSR_VEC@h - mtmsrd r5 /* enable use of VMX now */ - isync - -/* - * For SMP, we don't do lazy VMX switching because it just gets too - * horrendously complex, especially when a task switches from one CPU - * to another. Instead we call giveup_altvec in switch_to. - * VRSAVE isn't dealt with here, that is done in the normal context - * switch code. Note that we could rely on vrsave value to eventually - * avoid saving all of the VREGs here... - */ -#ifndef CONFIG_SMP - ld r3,last_task_used_altivec@got(r2) - ld r4,0(r3) - cmpdi 0,r4,0 - beq 1f - /* Save VMX state to last_task_used_altivec's THREAD struct */ - addi r4,r4,THREAD - SAVE_32VRS(0,r5,r4) - mfvscr vr0 - li r10,THREAD_VSCR - stvx vr0,r10,r4 - /* Disable VMX for last_task_used_altivec */ - ld r5,PT_REGS(r4) - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - lis r6,MSR_VEC@h - andc r4,r4,r6 - std r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#endif /* CONFIG_SMP */ - /* Hack: if we get an altivec unavailable trap with VRSAVE - * set to all zeros, we assume this is a broken application - * that fails to set it properly, and thus we switch it to - * all 1's - */ - mfspr r4,SPRN_VRSAVE - cmpdi 0,r4,0 - bne+ 1f - li r4,-1 - mtspr SPRN_VRSAVE,r4 -1: - /* enable use of VMX after return */ - ld r4,PACACURRENT(r13) - addi r5,r4,THREAD /* Get THREAD */ - oris r12,r12,MSR_VEC@h - std r12,_MSR(r1) - li r4,1 - li r10,THREAD_VSCR - stw r4,THREAD_USED_VR(r5) - lvx vr0,r10,r5 - mtvscr vr0 - REST_32VRS(0,r4,r5) -#ifndef CONFIG_SMP - /* Update last_task_used_math to 'current' */ - subi r4,r5,THREAD /* Back to 'current' */ - std r4,0(r3) -#endif /* CONFIG_SMP */ - /* restore registers and return */ - b fast_exception_return -#endif /* CONFIG_ALTIVEC */ - -/* - * Hash table stuff - */ - .align 7 -_GLOBAL(do_hash_page) - std r3,_DAR(r1) - std r4,_DSISR(r1) - - andis. r0,r4,0xa450 /* weird error? */ - bne- .handle_page_fault /* if not, try to insert a HPTE */ -BEGIN_FTR_SECTION - andis. r0,r4,0x0020 /* Is it a segment table fault? */ - bne- .do_ste_alloc /* If so handle it */ -END_FTR_SECTION_IFCLR(CPU_FTR_SLB) - - /* - * We need to set the _PAGE_USER bit if MSR_PR is set or if we are - * accessing a userspace segment (even from the kernel). We assume - * kernel addresses always have the high bit set. - */ - rlwinm r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */ - rotldi r0,r3,15 /* Move high bit into MSR_PR posn */ - orc r0,r12,r0 /* MSR_PR | ~high_bit */ - rlwimi r4,r0,32-13,30,30 /* becomes _PAGE_USER access bit */ - ori r4,r4,1 /* add _PAGE_PRESENT */ - rlwimi r4,r5,22+2,31-2,31-2 /* Set _PAGE_EXEC if trap is 0x400 */ - - /* - * On iSeries, we soft-disable interrupts here, then - * hard-enable interrupts so that the hash_page code can spin on - * the hash_table_lock without problems on a shared processor. - */ - DISABLE_INTS - - /* - * r3 contains the faulting address - * r4 contains the required access permissions - * r5 contains the trap number - * - * at return r3 = 0 for success - */ - bl .hash_page /* build HPTE if possible */ - cmpdi r3,0 /* see if hash_page succeeded */ - -#ifdef DO_SOFT_DISABLE - /* - * If we had interrupts soft-enabled at the point where the - * DSI/ISI occurred, and an interrupt came in during hash_page, - * handle it now. - * We jump to ret_from_except_lite rather than fast_exception_return - * because ret_from_except_lite will check for and handle pending - * interrupts if necessary. - */ - beq .ret_from_except_lite - /* For a hash failure, we don't bother re-enabling interrupts */ - ble- 12f - - /* - * hash_page couldn't handle it, set soft interrupt enable back - * to what it was before the trap. Note that .local_irq_restore - * handles any interrupts pending at this point. - */ - ld r3,SOFTE(r1) - bl .local_irq_restore - b 11f -#else - beq fast_exception_return /* Return from exception on success */ - ble- 12f /* Failure return from hash_page */ - - /* fall through */ -#endif - -/* Here we have a page fault that hash_page can't handle. */ -_GLOBAL(handle_page_fault) - ENABLE_INTS -11: ld r4,_DAR(r1) - ld r5,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_page_fault - cmpdi r3,0 - beq+ .ret_from_except_lite - bl .save_nvgprs - mr r5,r3 - addi r3,r1,STACK_FRAME_OVERHEAD - lwz r4,_DAR(r1) - bl .bad_page_fault - b .ret_from_except - -/* We have a page fault that hash_page could handle but HV refused - * the PTE insertion - */ -12: bl .save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD - lwz r4,_DAR(r1) - bl .low_hash_fault - b .ret_from_except - - /* here we have a segment miss */ -_GLOBAL(do_ste_alloc) - bl .ste_allocate /* try to insert stab entry */ - cmpdi r3,0 - beq+ fast_exception_return - b .handle_page_fault - -/* - * r13 points to the PACA, r9 contains the saved CR, - * r11 and r12 contain the saved SRR0 and SRR1. - * r9 - r13 are saved in paca->exslb. - * We assume we aren't going to take any exceptions during this procedure. - * We assume (DAR >> 60) == 0xc. - */ - .align 7 -_GLOBAL(do_stab_bolted) - stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ - std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */ - - /* Hash to the primary group */ - ld r10,PACASTABVIRT(r13) - mfspr r11,SPRN_DAR - srdi r11,r11,28 - rldimi r10,r11,7,52 /* r10 = first ste of the group */ - - /* Calculate VSID */ - /* This is a kernel address, so protovsid = ESID */ - ASM_VSID_SCRAMBLE(r11, r9) - rldic r9,r11,12,16 /* r9 = vsid << 12 */ - - /* Search the primary group for a free entry */ -1: ld r11,0(r10) /* Test valid bit of the current ste */ - andi. r11,r11,0x80 - beq 2f - addi r10,r10,16 - andi. r11,r10,0x70 - bne 1b - - /* Stick for only searching the primary group for now. */ - /* At least for now, we use a very simple random castout scheme */ - /* Use the TB as a random number ; OR in 1 to avoid entry 0 */ - mftb r11 - rldic r11,r11,4,57 /* r11 = (r11 << 4) & 0x70 */ - ori r11,r11,0x10 - - /* r10 currently points to an ste one past the group of interest */ - /* make it point to the randomly selected entry */ - subi r10,r10,128 - or r10,r10,r11 /* r10 is the entry to invalidate */ - - isync /* mark the entry invalid */ - ld r11,0(r10) - rldicl r11,r11,56,1 /* clear the valid bit */ - rotldi r11,r11,8 - std r11,0(r10) - sync - - clrrdi r11,r11,28 /* Get the esid part of the ste */ - slbie r11 - -2: std r9,8(r10) /* Store the vsid part of the ste */ - eieio - - mfspr r11,SPRN_DAR /* Get the new esid */ - clrrdi r11,r11,28 /* Permits a full 32b of ESID */ - ori r11,r11,0x90 /* Turn on valid and kp */ - std r11,0(r10) /* Put new entry back into the stab */ - - sync - - /* All done -- return from exception. */ - lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ - ld r11,PACA_EXSLB+EX_SRR0(r13) /* get saved SRR0 */ - - andi. r10,r12,MSR_RI - beq- unrecov_slb - - mtcrf 0x80,r9 /* restore CR */ - - mfmsr r10 - clrrdi r10,r10,2 - mtmsrd r10,1 - - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r12 - ld r9,PACA_EXSLB+EX_R9(r13) - ld r10,PACA_EXSLB+EX_R10(r13) - ld r11,PACA_EXSLB+EX_R11(r13) - ld r12,PACA_EXSLB+EX_R12(r13) - ld r13,PACA_EXSLB+EX_R13(r13) - rfid - b . /* prevent speculative execution */ - -/* - * Space for CPU0's segment table. - * - * On iSeries, the hypervisor must fill in at least one entry before - * we get control (with relocate on). The address is give to the hv - * as a page number (see xLparMap in lpardata.c), so this must be at a - * fixed address (the linker can't compute (u64)&initial_stab >> - * PAGE_SHIFT). - */ - . = STAB0_PHYS_ADDR /* 0x6000 */ - .globl initial_stab -initial_stab: - .space 4096 - -/* - * Data area reserved for FWNMI option. - * This address (0x7000) is fixed by the RPA. - */ - .= 0x7000 - .globl fwnmi_data_area -fwnmi_data_area: - - /* iSeries does not use the FWNMI stuff, so it is safe to put - * this here, even if we later allow kernels that will boot on - * both pSeries and iSeries */ -#ifdef CONFIG_PPC_ISERIES - . = LPARMAP_PHYS -#include "lparmap.s" -/* - * This ".text" is here for old compilers that generate a trailing - * .note section when compiling .c files to .s - */ - .text -#endif /* CONFIG_PPC_ISERIES */ - - . = 0x8000 - -/* - * On pSeries, secondary processors spin in the following code. - * At entry, r3 = this processor's number (physical cpu id) - */ -_GLOBAL(pSeries_secondary_smp_init) - mr r24,r3 - - /* turn on 64-bit mode */ - bl .enable_64b_mode - isync - - /* Copy some CPU settings from CPU 0 */ - bl .__restore_cpu_setup - - /* Set up a paca value for this processor. Since we have the - * physical cpu id in r24, we need to search the pacas to find - * which logical id maps to our physical one. - */ - LOADADDR(r13, paca) /* Get base vaddr of paca array */ - li r5,0 /* logical cpu id */ -1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ - cmpw r6,r24 /* Compare to our id */ - beq 2f - addi r13,r13,PACA_SIZE /* Loop to next PACA on miss */ - addi r5,r5,1 - cmpwi r5,NR_CPUS - blt 1b - - mr r3,r24 /* not found, copy phys to r3 */ - b .kexec_wait /* next kernel might do better */ - -2: mtspr SPRN_SPRG3,r13 /* Save vaddr of paca in SPRG3 */ - /* From now on, r24 is expected to be logical cpuid */ - mr r24,r5 -3: HMT_LOW - lbz r23,PACAPROCSTART(r13) /* Test if this processor should */ - /* start. */ - sync - - /* Create a temp kernel stack for use before relocation is on. */ - ld r1,PACAEMERGSP(r13) - subi r1,r1,STACK_FRAME_OVERHEAD - - cmpwi 0,r23,0 -#ifdef CONFIG_SMP - bne .__secondary_start -#endif - b 3b /* Loop until told to go */ - -#ifdef CONFIG_PPC_ISERIES -_STATIC(__start_initialization_iSeries) - /* Clear out the BSS */ - LOADADDR(r11,__bss_stop) - LOADADDR(r8,__bss_start) - sub r11,r11,r8 /* bss size */ - addi r11,r11,7 /* round up to an even double word */ - rldicl. r11,r11,61,3 /* shift right by 3 */ - beq 4f - addi r8,r8,-8 - li r0,0 - mtctr r11 /* zero this many doublewords */ -3: stdu r0,8(r8) - bdnz 3b -4: - LOADADDR(r1,init_thread_union) - addi r1,r1,THREAD_SIZE - li r0,0 - stdu r0,-STACK_FRAME_OVERHEAD(r1) - - LOADADDR(r3,cpu_specs) - LOADADDR(r4,cur_cpu_spec) - li r5,0 - bl .identify_cpu - - LOADADDR(r2,__toc_start) - addi r2,r2,0x4000 - addi r2,r2,0x4000 - - bl .iSeries_early_setup - bl .early_setup - - /* relocation is on at this point */ - - b .start_here_common -#endif /* CONFIG_PPC_ISERIES */ - -#ifdef CONFIG_PPC_MULTIPLATFORM - -_STATIC(__mmu_off) - mfmsr r3 - andi. r0,r3,MSR_IR|MSR_DR - beqlr - andc r3,r3,r0 - mtspr SPRN_SRR0,r4 - mtspr SPRN_SRR1,r3 - sync - rfid - b . /* prevent speculative execution */ - - -/* - * Here is our main kernel entry point. We support currently 2 kind of entries - * depending on the value of r5. - * - * r5 != NULL -> OF entry, we go to prom_init, "legacy" parameter content - * in r3...r7 - * - * r5 == NULL -> kexec style entry. r3 is a physical pointer to the - * DT block, r4 is a physical pointer to the kernel itself - * - */ -_GLOBAL(__start_initialization_multiplatform) - /* - * Are we booted from a PROM Of-type client-interface ? - */ - cmpldi cr0,r5,0 - bne .__boot_from_prom /* yes -> prom */ - - /* Save parameters */ - mr r31,r3 - mr r30,r4 - - /* Make sure we are running in 64 bits mode */ - bl .enable_64b_mode - - /* Setup some critical 970 SPRs before switching MMU off */ - bl .__970_cpu_preinit - - /* cpu # */ - li r24,0 - - /* Switch off MMU if not already */ - LOADADDR(r4, .__after_prom_start - KERNELBASE) - add r4,r4,r30 - bl .__mmu_off - b .__after_prom_start - -_STATIC(__boot_from_prom) - /* Save parameters */ - mr r31,r3 - mr r30,r4 - mr r29,r5 - mr r28,r6 - mr r27,r7 - - /* Make sure we are running in 64 bits mode */ - bl .enable_64b_mode - - /* put a relocation offset into r3 */ - bl .reloc_offset - - LOADADDR(r2,__toc_start) - addi r2,r2,0x4000 - addi r2,r2,0x4000 - - /* Relocate the TOC from a virt addr to a real addr */ - sub r2,r2,r3 - - /* Restore parameters */ - mr r3,r31 - mr r4,r30 - mr r5,r29 - mr r6,r28 - mr r7,r27 - - /* Do all of the interaction with OF client interface */ - bl .prom_init - /* We never return */ - trap - -/* - * At this point, r3 contains the physical address we are running at, - * returned by prom_init() - */ -_STATIC(__after_prom_start) - -/* - * We need to run with __start at physical address 0. - * This will leave some code in the first 256B of - * real memory, which are reserved for software use. - * The remainder of the first page is loaded with the fixed - * interrupt vectors. The next two pages are filled with - * unknown exception placeholders. - * - * Note: This process overwrites the OF exception vectors. - * r26 == relocation offset - * r27 == KERNELBASE - */ - bl .reloc_offset - mr r26,r3 - SET_REG_TO_CONST(r27,KERNELBASE) - - li r3,0 /* target addr */ - - // XXX FIXME: Use phys returned by OF (r30) - sub r4,r27,r26 /* source addr */ - /* current address of _start */ - /* i.e. where we are running */ - /* the source addr */ - - LOADADDR(r5,copy_to_here) /* # bytes of memory to copy */ - sub r5,r5,r27 - - li r6,0x100 /* Start offset, the first 0x100 */ - /* bytes were copied earlier. */ - - bl .copy_and_flush /* copy the first n bytes */ - /* this includes the code being */ - /* executed here. */ - - LOADADDR(r0, 4f) /* Jump to the copy of this code */ - mtctr r0 /* that we just made/relocated */ - bctr - -4: LOADADDR(r5,klimit) - sub r5,r5,r26 - ld r5,0(r5) /* get the value of klimit */ - sub r5,r5,r27 - bl .copy_and_flush /* copy the rest */ - b .start_here_multiplatform - -#endif /* CONFIG_PPC_MULTIPLATFORM */ - -/* - * Copy routine used to copy the kernel to start at physical address 0 - * and flush and invalidate the caches as needed. - * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset - * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5. - * - * Note: this routine *only* clobbers r0, r6 and lr - */ -_GLOBAL(copy_and_flush) - addi r5,r5,-8 - addi r6,r6,-8 -4: li r0,16 /* Use the least common */ - /* denominator cache line */ - /* size. This results in */ - /* extra cache line flushes */ - /* but operation is correct. */ - /* Can't get cache line size */ - /* from NACA as it is being */ - /* moved too. */ - - mtctr r0 /* put # words/line in ctr */ -3: addi r6,r6,8 /* copy a cache line */ - ldx r0,r6,r4 - stdx r0,r6,r3 - bdnz 3b - dcbst r6,r3 /* write it to memory */ - sync - icbi r6,r3 /* flush the icache line */ - cmpld 0,r6,r5 - blt 4b - sync - addi r5,r5,8 - addi r6,r6,8 - blr - -.align 8 -copy_to_here: - -#ifdef CONFIG_SMP -#ifdef CONFIG_PPC_PMAC -/* - * On PowerMac, secondary processors starts from the reset vector, which - * is temporarily turned into a call to one of the functions below. - */ - .section ".text"; - .align 2 ; - - .globl __secondary_start_pmac_0 -__secondary_start_pmac_0: - /* NB the entries for cpus 0, 1, 2 must each occupy 8 bytes. */ - li r24,0 - b 1f - li r24,1 - b 1f - li r24,2 - b 1f - li r24,3 -1: - -_GLOBAL(pmac_secondary_start) - /* turn on 64-bit mode */ - bl .enable_64b_mode - isync - - /* Copy some CPU settings from CPU 0 */ - bl .__restore_cpu_setup - - /* pSeries do that early though I don't think we really need it */ - mfmsr r3 - ori r3,r3,MSR_RI - mtmsrd r3 /* RI on */ - - /* Set up a paca value for this processor. */ - LOADADDR(r4, paca) /* Get base vaddr of paca array */ - mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ - add r13,r13,r4 /* for this processor. */ - mtspr SPRN_SPRG3,r13 /* Save vaddr of paca in SPRG3 */ - - /* Create a temp kernel stack for use before relocation is on. */ - ld r1,PACAEMERGSP(r13) - subi r1,r1,STACK_FRAME_OVERHEAD - - b .__secondary_start - -#endif /* CONFIG_PPC_PMAC */ - -/* - * This function is called after the master CPU has released the - * secondary processors. The execution environment is relocation off. - * The paca for this processor has the following fields initialized at - * this point: - * 1. Processor number - * 2. Segment table pointer (virtual address) - * On entry the following are set: - * r1 = stack pointer. vaddr for iSeries, raddr (temp stack) for pSeries - * r24 = cpu# (in Linux terms) - * r13 = paca virtual address - * SPRG3 = paca virtual address - */ -_GLOBAL(__secondary_start) - - HMT_MEDIUM /* Set thread priority to MEDIUM */ - - ld r2,PACATOC(r13) - - /* Do early setup for that CPU */ - bl .early_setup_secondary - - /* Initialize the kernel stack. Just a repeat for iSeries. */ - LOADADDR(r3,current_set) - sldi r28,r24,3 /* get current_set[cpu#] */ - ldx r1,r3,r28 - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD - std r1,PACAKSAVE(r13) - - li r7,0 - mtlr r7 - - /* enable MMU and jump to start_secondary */ - LOADADDR(r3,.start_secondary_prolog) - SET_REG_TO_CONST(r4, MSR_KERNEL) -#ifdef DO_SOFT_DISABLE - ori r4,r4,MSR_EE -#endif - mtspr SPRN_SRR0,r3 - mtspr SPRN_SRR1,r4 - rfid - b . /* prevent speculative execution */ - -/* - * Running with relocation on at this point. All we want to do is - * zero the stack back-chain pointer before going into C code. - */ -_GLOBAL(start_secondary_prolog) - li r3,0 - std r3,0(r1) /* Zero the stack frame pointer */ - bl .start_secondary -#endif - -/* - * This subroutine clobbers r11 and r12 - */ -_GLOBAL(enable_64b_mode) - mfmsr r11 /* grab the current MSR */ - li r12,1 - rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG) - or r11,r11,r12 - li r12,1 - rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG) - or r11,r11,r12 - mtmsrd r11 - isync - blr - -#ifdef CONFIG_PPC_MULTIPLATFORM -/* - * This is where the main kernel code starts. - */ -_STATIC(start_here_multiplatform) - /* get a new offset, now that the kernel has moved. */ - bl .reloc_offset - mr r26,r3 - - /* Clear out the BSS. It may have been done in prom_init, - * already but that's irrelevant since prom_init will soon - * be detached from the kernel completely. Besides, we need - * to clear it now for kexec-style entry. - */ - LOADADDR(r11,__bss_stop) - LOADADDR(r8,__bss_start) - sub r11,r11,r8 /* bss size */ - addi r11,r11,7 /* round up to an even double word */ - rldicl. r11,r11,61,3 /* shift right by 3 */ - beq 4f - addi r8,r8,-8 - li r0,0 - mtctr r11 /* zero this many doublewords */ -3: stdu r0,8(r8) - bdnz 3b -4: - - mfmsr r6 - ori r6,r6,MSR_RI - mtmsrd r6 /* RI on */ - -#ifdef CONFIG_HMT - /* Start up the second thread on cpu 0 */ - mfspr r3,SPRN_PVR - srwi r3,r3,16 - cmpwi r3,0x34 /* Pulsar */ - beq 90f - cmpwi r3,0x36 /* Icestar */ - beq 90f - cmpwi r3,0x37 /* SStar */ - beq 90f - b 91f /* HMT not supported */ -90: li r3,0 - bl .hmt_start_secondary -91: -#endif - - /* The following gets the stack and TOC set up with the regs */ - /* pointing to the real addr of the kernel stack. This is */ - /* all done to support the C function call below which sets */ - /* up the htab. This is done because we have relocated the */ - /* kernel but are still running in real mode. */ - - LOADADDR(r3,init_thread_union) - sub r3,r3,r26 - - /* set up a stack pointer (physical address) */ - addi r1,r3,THREAD_SIZE - li r0,0 - stdu r0,-STACK_FRAME_OVERHEAD(r1) - - /* set up the TOC (physical address) */ - LOADADDR(r2,__toc_start) - addi r2,r2,0x4000 - addi r2,r2,0x4000 - sub r2,r2,r26 - - LOADADDR(r3,cpu_specs) - sub r3,r3,r26 - LOADADDR(r4,cur_cpu_spec) - sub r4,r4,r26 - mr r5,r26 - bl .identify_cpu - - /* Save some low level config HIDs of CPU0 to be copied to - * other CPUs later on, or used for suspend/resume - */ - bl .__save_cpu_setup - sync - - /* Setup a valid physical PACA pointer in SPRG3 for early_setup - * note that boot_cpuid can always be 0 nowadays since there is - * nowhere it can be initialized differently before we reach this - * code - */ - LOADADDR(r27, boot_cpuid) - sub r27,r27,r26 - lwz r27,0(r27) - - LOADADDR(r24, paca) /* Get base vaddr of paca array */ - mulli r13,r27,PACA_SIZE /* Calculate vaddr of right paca */ - add r13,r13,r24 /* for this processor. */ - sub r13,r13,r26 /* convert to physical addr */ - mtspr SPRN_SPRG3,r13 /* PPPBBB: Temp... -Peter */ - - /* Do very early kernel initializations, including initial hash table, - * stab and slb setup before we turn on relocation. */ - - /* Restore parameters passed from prom_init/kexec */ - mr r3,r31 - bl .early_setup - - LOADADDR(r3,.start_here_common) - SET_REG_TO_CONST(r4, MSR_KERNEL) - mtspr SPRN_SRR0,r3 - mtspr SPRN_SRR1,r4 - rfid - b . /* prevent speculative execution */ -#endif /* CONFIG_PPC_MULTIPLATFORM */ - - /* This is where all platforms converge execution */ -_STATIC(start_here_common) - /* relocation is on at this point */ - - /* The following code sets up the SP and TOC now that we are */ - /* running with translation enabled. */ - - LOADADDR(r3,init_thread_union) - - /* set up the stack */ - addi r1,r3,THREAD_SIZE - li r0,0 - stdu r0,-STACK_FRAME_OVERHEAD(r1) - - /* Apply the CPUs-specific fixups (nop out sections not relevant - * to this CPU - */ - li r3,0 - bl .do_cpu_ftr_fixups - - LOADADDR(r26, boot_cpuid) - lwz r26,0(r26) - - LOADADDR(r24, paca) /* Get base vaddr of paca array */ - mulli r13,r26,PACA_SIZE /* Calculate vaddr of right paca */ - add r13,r13,r24 /* for this processor. */ - mtspr SPRN_SPRG3,r13 - - /* ptr to current */ - LOADADDR(r4,init_task) - std r4,PACACURRENT(r13) - - /* Load the TOC */ - ld r2,PACATOC(r13) - std r1,PACAKSAVE(r13) - - bl .setup_system - - /* Load up the kernel context */ -5: -#ifdef DO_SOFT_DISABLE - li r5,0 - stb r5,PACAPROCENABLED(r13) /* Soft Disabled */ - mfmsr r5 - ori r5,r5,MSR_EE /* Hard Enabled */ - mtmsrd r5 -#endif - - bl .start_kernel - -_GLOBAL(hmt_init) -#ifdef CONFIG_HMT - LOADADDR(r5, hmt_thread_data) - mfspr r7,SPRN_PVR - srwi r7,r7,16 - cmpwi r7,0x34 /* Pulsar */ - beq 90f - cmpwi r7,0x36 /* Icestar */ - beq 91f - cmpwi r7,0x37 /* SStar */ - beq 91f - b 101f -90: mfspr r6,SPRN_PIR - andi. r6,r6,0x1f - b 92f -91: mfspr r6,SPRN_PIR - andi. r6,r6,0x3ff -92: sldi r4,r24,3 - stwx r6,r5,r4 - bl .hmt_start_secondary - b 101f - -__hmt_secondary_hold: - LOADADDR(r5, hmt_thread_data) - clrldi r5,r5,4 - li r7,0 - mfspr r6,SPRN_PIR - mfspr r8,SPRN_PVR - srwi r8,r8,16 - cmpwi r8,0x34 - bne 93f - andi. r6,r6,0x1f - b 103f -93: andi. r6,r6,0x3f - -103: lwzx r8,r5,r7 - cmpw r8,r6 - beq 104f - addi r7,r7,8 - b 103b - -104: addi r7,r7,4 - lwzx r9,r5,r7 - mr r24,r9 -101: -#endif - mr r3,r24 - b .pSeries_secondary_smp_init - -#ifdef CONFIG_HMT -_GLOBAL(hmt_start_secondary) - LOADADDR(r4,__hmt_secondary_hold) - clrldi r4,r4,4 - mtspr SPRN_NIADORM, r4 - mfspr r4, SPRN_MSRDORM - li r5, -65 - and r4, r4, r5 - mtspr SPRN_MSRDORM, r4 - lis r4,0xffef - ori r4,r4,0x7403 - mtspr SPRN_TSC, r4 - li r4,0x1f4 - mtspr SPRN_TST, r4 - mfspr r4, SPRN_HID0 - ori r4, r4, 0x1 - mtspr SPRN_HID0, r4 - mfspr r4, SPRN_CTRLF - oris r4, r4, 0x40 - mtspr SPRN_CTRLT, r4 - blr -#endif - -/* - * We put a few things here that have to be page-aligned. - * This stuff goes at the beginning of the bss, which is page-aligned. - */ - .section ".bss" - - .align PAGE_SHIFT - - .globl empty_zero_page -empty_zero_page: - .space PAGE_SIZE - - .globl swapper_pg_dir -swapper_pg_dir: - .space PAGE_SIZE - -/* - * This space gets a copy of optional info passed to us by the bootstrap - * Used to pass parameters into the kernel like root=/dev/sda1, etc. - */ - .globl cmd_line -cmd_line: - .space COMMAND_LINE_SIZE diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S deleted file mode 100644 index 5e089deb0a2b..000000000000 --- a/arch/ppc64/kernel/misc.S +++ /dev/null @@ -1,940 +0,0 @@ -/* - * arch/ppc/kernel/misc.S - * - * - * - * This file contains miscellaneous low-level functions. - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) - * and Paul Mackerras. - * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com) - * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include <linux/config.h> -#include <linux/sys.h> -#include <asm/unistd.h> -#include <asm/errno.h> -#include <asm/processor.h> -#include <asm/page.h> -#include <asm/cache.h> -#include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/cputable.h> -#include <asm/thread_info.h> - - .text - -/* - * Returns (address we were linked at) - (address we are running at) - * for use before the text and data are mapped to KERNELBASE. - */ - -_GLOBAL(reloc_offset) - mflr r0 - bl 1f -1: mflr r3 - LOADADDR(r4,1b) - sub r3,r4,r3 - mtlr r0 - blr - -_GLOBAL(get_msr) - mfmsr r3 - blr - -_GLOBAL(get_dar) - mfdar r3 - blr - -_GLOBAL(get_srr0) - mfsrr0 r3 - blr - -_GLOBAL(get_srr1) - mfsrr1 r3 - blr - -_GLOBAL(get_sp) - mr r3,r1 - blr - -#ifdef CONFIG_IRQSTACKS -_GLOBAL(call_do_softirq) - mflr r0 - std r0,16(r1) - stdu r1,THREAD_SIZE-112(r3) - mr r1,r3 - bl .__do_softirq - ld r1,0(r1) - ld r0,16(r1) - mtlr r0 - blr - -_GLOBAL(call___do_IRQ) - mflr r0 - std r0,16(r1) - stdu r1,THREAD_SIZE-112(r5) - mr r1,r5 - bl .__do_IRQ - ld r1,0(r1) - ld r0,16(r1) - mtlr r0 - blr -#endif /* CONFIG_IRQSTACKS */ - - /* - * To be called by C code which needs to do some operations with MMU - * disabled. Note that interrupts have to be disabled by the caller - * prior to calling us. The code called _MUST_ be in the RMO of course - * and part of the linear mapping as we don't attempt to translate the - * stack pointer at all. The function is called with the stack switched - * to this CPU emergency stack - * - * prototype is void *call_with_mmu_off(void *func, void *data); - * - * the called function is expected to be of the form - * - * void *called(void *data); - */ -_GLOBAL(call_with_mmu_off) - mflr r0 /* get link, save it on stackframe */ - std r0,16(r1) - mr r1,r5 /* save old stack ptr */ - ld r1,PACAEMERGSP(r13) /* get emerg. stack */ - subi r1,r1,STACK_FRAME_OVERHEAD - std r0,16(r1) /* save link on emerg. stack */ - std r5,0(r1) /* save old stack ptr in backchain */ - ld r3,0(r3) /* get to real function ptr (assume same TOC) */ - bl 2f /* we need LR to return, continue at label 2 */ - - ld r0,16(r1) /* we return here from the call, get LR and */ - ld r1,0(r1) /* .. old stack ptr */ - mtspr SPRN_SRR0,r0 /* and get back to virtual mode with these */ - mfmsr r4 - ori r4,r4,MSR_IR|MSR_DR - mtspr SPRN_SRR1,r4 - rfid - -2: mtspr SPRN_SRR0,r3 /* coming from above, enter real mode */ - mr r3,r4 /* get parameter */ - mfmsr r0 - ori r0,r0,MSR_IR|MSR_DR - xori r0,r0,MSR_IR|MSR_DR - mtspr SPRN_SRR1,r0 - rfid - - - .section ".toc","aw" -PPC64_CACHES: - .tc ppc64_caches[TC],ppc64_caches - .section ".text" - -/* - * Write any modified data cache blocks out to memory - * and invalidate the corresponding instruction cache blocks. - * - * flush_icache_range(unsigned long start, unsigned long stop) - * - * flush all bytes from start through stop-1 inclusive - */ - -_KPROBE(__flush_icache_range) - -/* - * Flush the data cache to memory - * - * Different systems have different cache line sizes - * and in some cases i-cache and d-cache line sizes differ from - * each other. - */ - ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1LINESIZE(r10)/* Get cache line size */ - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of cache line size */ - srw. r8,r8,r9 /* compute line count */ - beqlr /* nothing to do? */ - mtctr r8 -1: dcbst 0,r6 - add r6,r6,r7 - bdnz 1b - sync - -/* Now invalidate the instruction cache */ - - lwz r7,ICACHEL1LINESIZE(r10) /* Get Icache line size */ - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 - lwz r9,ICACHEL1LOGLINESIZE(r10) /* Get log-2 of Icache line size */ - srw. r8,r8,r9 /* compute line count */ - beqlr /* nothing to do? */ - mtctr r8 -2: icbi 0,r6 - add r6,r6,r7 - bdnz 2b - isync - blr - - .text -/* - * Like above, but only do the D-cache. - * - * flush_dcache_range(unsigned long start, unsigned long stop) - * - * flush all bytes from start to stop-1 inclusive - */ -_GLOBAL(flush_dcache_range) - -/* - * Flush the data cache to memory - * - * Different systems have different cache line sizes - */ - ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */ - srw. r8,r8,r9 /* compute line count */ - beqlr /* nothing to do? */ - mtctr r8 -0: dcbst 0,r6 - add r6,r6,r7 - bdnz 0b - sync - blr - -/* - * Like above, but works on non-mapped physical addresses. - * Use only for non-LPAR setups ! It also assumes real mode - * is cacheable. Used for flushing out the DART before using - * it as uncacheable memory - * - * flush_dcache_phys_range(unsigned long start, unsigned long stop) - * - * flush all bytes from start to stop-1 inclusive - */ -_GLOBAL(flush_dcache_phys_range) - ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */ - srw. r8,r8,r9 /* compute line count */ - beqlr /* nothing to do? */ - mfmsr r5 /* Disable MMU Data Relocation */ - ori r0,r5,MSR_DR - xori r0,r0,MSR_DR - sync - mtmsr r0 - sync - isync - mtctr r8 -0: dcbst 0,r6 - add r6,r6,r7 - bdnz 0b - sync - isync - mtmsr r5 /* Re-enable MMU Data Relocation */ - sync - isync - blr - -_GLOBAL(flush_inval_dcache_range) - ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGLINESIZE(r10)/* Get log-2 of dcache line size */ - srw. r8,r8,r9 /* compute line count */ - beqlr /* nothing to do? */ - sync - isync - mtctr r8 -0: dcbf 0,r6 - add r6,r6,r7 - bdnz 0b - sync - isync - blr - - -/* - * Flush a particular page from the data cache to RAM. - * Note: this is necessary because the instruction cache does *not* - * snoop from the data cache. - * - * void __flush_dcache_icache(void *page) - */ -_GLOBAL(__flush_dcache_icache) -/* - * Flush the data cache to memory - * - * Different systems have different cache line sizes - */ - -/* Flush the dcache */ - ld r7,PPC64_CACHES@toc(r2) - clrrdi r3,r3,PAGE_SHIFT /* Page align */ - lwz r4,DCACHEL1LINESPERPAGE(r7) /* Get # dcache lines per page */ - lwz r5,DCACHEL1LINESIZE(r7) /* Get dcache line size */ - mr r6,r3 - mtctr r4 -0: dcbst 0,r6 - add r6,r6,r5 - bdnz 0b - sync - -/* Now invalidate the icache */ - - lwz r4,ICACHEL1LINESPERPAGE(r7) /* Get # icache lines per page */ - lwz r5,ICACHEL1LINESIZE(r7) /* Get icache line size */ - mtctr r4 -1: icbi 0,r3 - add r3,r3,r5 - bdnz 1b - isync - blr - -/* - * I/O string operations - * - * insb(port, buf, len) - * outsb(port, buf, len) - * insw(port, buf, len) - * outsw(port, buf, len) - * insl(port, buf, len) - * outsl(port, buf, len) - * insw_ns(port, buf, len) - * outsw_ns(port, buf, len) - * insl_ns(port, buf, len) - * outsl_ns(port, buf, len) - * - * The *_ns versions don't do byte-swapping. - */ -_GLOBAL(_insb) - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,1 - blelr- -00: lbz r5,0(r3) - eieio - stbu r5,1(r4) - bdnz 00b - twi 0,r5,0 - isync - blr - -_GLOBAL(_outsb) - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,1 - blelr- -00: lbzu r5,1(r4) - stb r5,0(r3) - bdnz 00b - sync - blr - -_GLOBAL(_insw) - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,2 - blelr- -00: lhbrx r5,0,r3 - eieio - sthu r5,2(r4) - bdnz 00b - twi 0,r5,0 - isync - blr - -_GLOBAL(_outsw) - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,2 - blelr- -00: lhzu r5,2(r4) - sthbrx r5,0,r3 - bdnz 00b - sync - blr - -_GLOBAL(_insl) - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,4 - blelr- -00: lwbrx r5,0,r3 - eieio - stwu r5,4(r4) - bdnz 00b - twi 0,r5,0 - isync - blr - -_GLOBAL(_outsl) - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,4 - blelr- -00: lwzu r5,4(r4) - stwbrx r5,0,r3 - bdnz 00b - sync - blr - -/* _GLOBAL(ide_insw) now in drivers/ide/ide-iops.c */ -_GLOBAL(_insw_ns) - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,2 - blelr- -00: lhz r5,0(r3) - eieio - sthu r5,2(r4) - bdnz 00b - twi 0,r5,0 - isync - blr - -/* _GLOBAL(ide_outsw) now in drivers/ide/ide-iops.c */ -_GLOBAL(_outsw_ns) - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,2 - blelr- -00: lhzu r5,2(r4) - sth r5,0(r3) - bdnz 00b - sync - blr - -_GLOBAL(_insl_ns) - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,4 - blelr- -00: lwz r5,0(r3) - eieio - stwu r5,4(r4) - bdnz 00b - twi 0,r5,0 - isync - blr - -_GLOBAL(_outsl_ns) - cmpwi 0,r5,0 - mtctr r5 - subi r4,r4,4 - blelr- -00: lwzu r5,4(r4) - stw r5,0(r3) - bdnz 00b - sync - blr - -/* - * identify_cpu and calls setup_cpu - * In: r3 = base of the cpu_specs array - * r4 = address of cur_cpu_spec - * r5 = relocation offset - */ -_GLOBAL(identify_cpu) - mfpvr r7 -1: - lwz r8,CPU_SPEC_PVR_MASK(r3) - and r8,r8,r7 - lwz r9,CPU_SPEC_PVR_VALUE(r3) - cmplw 0,r9,r8 - beq 1f - addi r3,r3,CPU_SPEC_ENTRY_SIZE - b 1b -1: - add r0,r3,r5 - std r0,0(r4) - ld r4,CPU_SPEC_SETUP(r3) - sub r4,r4,r5 - ld r4,0(r4) - sub r4,r4,r5 - mtctr r4 - /* Calling convention for cpu setup is r3=offset, r4=cur_cpu_spec */ - mr r4,r3 - mr r3,r5 - bctr - -/* - * do_cpu_ftr_fixups - goes through the list of CPU feature fixups - * and writes nop's over sections of code that don't apply for this cpu. - * r3 = data offset (not changed) - */ -_GLOBAL(do_cpu_ftr_fixups) - /* Get CPU 0 features */ - LOADADDR(r6,cur_cpu_spec) - sub r6,r6,r3 - ld r4,0(r6) - sub r4,r4,r3 - ld r4,CPU_SPEC_FEATURES(r4) - /* Get the fixup table */ - LOADADDR(r6,__start___ftr_fixup) - sub r6,r6,r3 - LOADADDR(r7,__stop___ftr_fixup) - sub r7,r7,r3 - /* Do the fixup */ -1: cmpld r6,r7 - bgelr - addi r6,r6,32 - ld r8,-32(r6) /* mask */ - and r8,r8,r4 - ld r9,-24(r6) /* value */ - cmpld r8,r9 - beq 1b - ld r8,-16(r6) /* section begin */ - ld r9,-8(r6) /* section end */ - subf. r9,r8,r9 - beq 1b - /* write nops over the section of code */ - /* todo: if large section, add a branch at the start of it */ - srwi r9,r9,2 - mtctr r9 - sub r8,r8,r3 - lis r0,0x60000000@h /* nop */ -3: stw r0,0(r8) - andi. r10,r4,CPU_FTR_SPLIT_ID_CACHE@l - beq 2f - dcbst 0,r8 /* suboptimal, but simpler */ - sync - icbi 0,r8 -2: addi r8,r8,4 - bdnz 3b - sync /* additional sync needed on g4 */ - isync - b 1b - -#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) -/* - * Do an IO access in real mode - */ -_GLOBAL(real_readb) - mfmsr r7 - ori r0,r7,MSR_DR - xori r0,r0,MSR_DR - sync - mtmsrd r0 - sync - isync - mfspr r6,SPRN_HID4 - rldicl r5,r6,32,0 - ori r5,r5,0x100 - rldicl r5,r5,32,0 - sync - mtspr SPRN_HID4,r5 - isync - slbia - isync - lbz r3,0(r3) - sync - mtspr SPRN_HID4,r6 - isync - slbia - isync - mtmsrd r7 - sync - isync - blr - -/* - * Do an IO access in real mode - */ -_GLOBAL(real_writeb) - mfmsr r7 - ori r0,r7,MSR_DR - xori r0,r0,MSR_DR - sync - mtmsrd r0 - sync - isync - mfspr r6,SPRN_HID4 - rldicl r5,r6,32,0 - ori r5,r5,0x100 - rldicl r5,r5,32,0 - sync - mtspr SPRN_HID4,r5 - isync - slbia - isync - stb r3,0(r4) - sync - mtspr SPRN_HID4,r6 - isync - slbia - isync - mtmsrd r7 - sync - isync - blr -#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */ - -/* - * SCOM access functions for 970 (FX only for now) - * - * unsigned long scom970_read(unsigned int address); - * void scom970_write(unsigned int address, unsigned long value); - * - * The address passed in is the 24 bits register address. This code - * is 970 specific and will not check the status bits, so you should - * know what you are doing. - */ -_GLOBAL(scom970_read) - /* interrupts off */ - mfmsr r4 - ori r0,r4,MSR_EE - xori r0,r0,MSR_EE - mtmsrd r0,1 - - /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits - * (including parity). On current CPUs they must be 0'd, - * and finally or in RW bit - */ - rlwinm r3,r3,8,0,15 - ori r3,r3,0x8000 - - /* do the actual scom read */ - sync - mtspr SPRN_SCOMC,r3 - isync - mfspr r3,SPRN_SCOMD - isync - mfspr r0,SPRN_SCOMC - isync - - /* XXX: fixup result on some buggy 970's (ouch ! we lost a bit, bah - * that's the best we can do). Not implemented yet as we don't use - * the scom on any of the bogus CPUs yet, but may have to be done - * ultimately - */ - - /* restore interrupts */ - mtmsrd r4,1 - blr - - -_GLOBAL(scom970_write) - /* interrupts off */ - mfmsr r5 - ori r0,r5,MSR_EE - xori r0,r0,MSR_EE - mtmsrd r0,1 - - /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits - * (including parity). On current CPUs they must be 0'd. - */ - - rlwinm r3,r3,8,0,15 - - sync - mtspr SPRN_SCOMD,r4 /* write data */ - isync - mtspr SPRN_SCOMC,r3 /* write command */ - isync - mfspr 3,SPRN_SCOMC - isync - - /* restore interrupts */ - mtmsrd r5,1 - blr - - -/* - * Create a kernel thread - * kernel_thread(fn, arg, flags) - */ -_GLOBAL(kernel_thread) - std r29,-24(r1) - std r30,-16(r1) - stdu r1,-STACK_FRAME_OVERHEAD(r1) - mr r29,r3 - mr r30,r4 - ori r3,r5,CLONE_VM /* flags */ - oris r3,r3,(CLONE_UNTRACED>>16) - li r4,0 /* new sp (unused) */ - li r0,__NR_clone - sc - cmpdi 0,r3,0 /* parent or child? */ - bne 1f /* return if parent */ - li r0,0 - stdu r0,-STACK_FRAME_OVERHEAD(r1) - ld r2,8(r29) - ld r29,0(r29) - mtlr r29 /* fn addr in lr */ - mr r3,r30 /* load arg and call fn */ - blrl - li r0,__NR_exit /* exit after child exits */ - li r3,0 - sc -1: addi r1,r1,STACK_FRAME_OVERHEAD - ld r29,-24(r1) - ld r30,-16(r1) - blr - -/* - * disable_kernel_fp() - * Disable the FPU. - */ -_GLOBAL(disable_kernel_fp) - mfmsr r3 - rldicl r0,r3,(63-MSR_FP_LG),1 - rldicl r3,r0,(MSR_FP_LG+1),0 - mtmsrd r3 /* disable use of fpu now */ - isync - blr - -#ifdef CONFIG_ALTIVEC - -#if 0 /* this has no callers for now */ -/* - * disable_kernel_altivec() - * Disable the VMX. - */ -_GLOBAL(disable_kernel_altivec) - mfmsr r3 - rldicl r0,r3,(63-MSR_VEC_LG),1 - rldicl r3,r0,(MSR_VEC_LG+1),0 - mtmsrd r3 /* disable use of VMX now */ - isync - blr -#endif /* 0 */ - -/* - * giveup_altivec(tsk) - * Disable VMX for the task given as the argument, - * and save the vector registers in its thread_struct. - * Enables the VMX for use in the kernel on return. - */ -_GLOBAL(giveup_altivec) - mfmsr r5 - oris r5,r5,MSR_VEC@h - mtmsrd r5 /* enable use of VMX now */ - isync - cmpdi 0,r3,0 - beqlr- /* if no previous owner, done */ - addi r3,r3,THREAD /* want THREAD of task */ - ld r5,PT_REGS(r3) - cmpdi 0,r5,0 - SAVE_32VRS(0,r4,r3) - mfvscr vr0 - li r4,THREAD_VSCR - stvx vr0,r4,r3 - beq 1f - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - lis r3,MSR_VEC@h - andc r4,r4,r3 /* disable FP for previous task */ - std r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#ifndef CONFIG_SMP - li r5,0 - ld r4,last_task_used_altivec@got(r2) - std r5,0(r4) -#endif /* CONFIG_SMP */ - blr - -#endif /* CONFIG_ALTIVEC */ - -_GLOBAL(__setup_cpu_power3) - blr - -_GLOBAL(execve) - li r0,__NR_execve - sc - bnslr - neg r3,r3 - blr - -/* kexec_wait(phys_cpu) - * - * wait for the flag to change, indicating this kernel is going away but - * the slave code for the next one is at addresses 0 to 100. - * - * This is used by all slaves. - * - * Physical (hardware) cpu id should be in r3. - */ -_GLOBAL(kexec_wait) - bl 1f -1: mflr r5 - addi r5,r5,kexec_flag-1b - -99: HMT_LOW -#ifdef CONFIG_KEXEC /* use no memory without kexec */ - lwz r4,0(r5) - cmpwi 0,r4,0 - bnea 0x60 -#endif - b 99b - -/* this can be in text because we won't change it until we are - * running in real anyways - */ -kexec_flag: - .long 0 - - -#ifdef CONFIG_KEXEC - -/* kexec_smp_wait(void) - * - * call with interrupts off - * note: this is a terminal routine, it does not save lr - * - * get phys id from paca - * set paca id to -1 to say we got here - * switch to real mode - * join other cpus in kexec_wait(phys_id) - */ -_GLOBAL(kexec_smp_wait) - lhz r3,PACAHWCPUID(r13) - li r4,-1 - sth r4,PACAHWCPUID(r13) /* let others know we left */ - bl real_mode - b .kexec_wait - -/* - * switch to real mode (turn mmu off) - * we use the early kernel trick that the hardware ignores bits - * 0 and 1 (big endian) of the effective address in real mode - * - * don't overwrite r3 here, it is live for kexec_wait above. - */ -real_mode: /* assume normal blr return */ -1: li r9,MSR_RI - li r10,MSR_DR|MSR_IR - mflr r11 /* return address to SRR0 */ - mfmsr r12 - andc r9,r12,r9 - andc r10,r12,r10 - - mtmsrd r9,1 - mtspr SPRN_SRR1,r10 - mtspr SPRN_SRR0,r11 - rfid - - -/* - * kexec_sequence(newstack, start, image, control, clear_all()) - * - * does the grungy work with stack switching and real mode switches - * also does simple calls to other code - */ - -_GLOBAL(kexec_sequence) - mflr r0 - std r0,16(r1) - - /* switch stacks to newstack -- &kexec_stack.stack */ - stdu r1,THREAD_SIZE-112(r3) - mr r1,r3 - - li r0,0 - std r0,16(r1) - - /* save regs for local vars on new stack. - * yes, we won't go back, but ... - */ - std r31,-8(r1) - std r30,-16(r1) - std r29,-24(r1) - std r28,-32(r1) - std r27,-40(r1) - std r26,-48(r1) - std r25,-56(r1) - - stdu r1,-112-64(r1) - - /* save args into preserved regs */ - mr r31,r3 /* newstack (both) */ - mr r30,r4 /* start (real) */ - mr r29,r5 /* image (virt) */ - mr r28,r6 /* control, unused */ - mr r27,r7 /* clear_all() fn desc */ - mr r26,r8 /* spare */ - lhz r25,PACAHWCPUID(r13) /* get our phys cpu from paca */ - - /* disable interrupts, we are overwriting kernel data next */ - mfmsr r3 - rlwinm r3,r3,0,17,15 - mtmsrd r3,1 - - /* copy dest pages, flush whole dest image */ - mr r3,r29 - bl .kexec_copy_flush /* (image) */ - - /* turn off mmu */ - bl real_mode - - /* clear out hardware hash page table and tlb */ - ld r5,0(r27) /* deref function descriptor */ - mtctr r5 - bctrl /* ppc_md.hash_clear_all(void); */ - -/* - * kexec image calling is: - * the first 0x100 bytes of the entry point are copied to 0 - * - * all slaves branch to slave = 0x60 (absolute) - * slave(phys_cpu_id); - * - * master goes to start = entry point - * start(phys_cpu_id, start, 0); - * - * - * a wrapper is needed to call existing kernels, here is an approximate - * description of one method: - * - * v2: (2.6.10) - * start will be near the boot_block (maybe 0x100 bytes before it?) - * it will have a 0x60, which will b to boot_block, where it will wait - * and 0 will store phys into struct boot-block and load r3 from there, - * copy kernel 0-0x100 and tell slaves to back down to 0x60 again - * - * v1: (2.6.9) - * boot block will have all cpus scanning device tree to see if they - * are the boot cpu ????? - * other device tree differences (prop sizes, va vs pa, etc)... - */ - - /* copy 0x100 bytes starting at start to 0 */ - li r3,0 - mr r4,r30 - li r5,0x100 - li r6,0 - bl .copy_and_flush /* (dest, src, copy limit, start offset) */ -1: /* assume normal blr return */ - - /* release other cpus to the new kernel secondary start at 0x60 */ - mflr r5 - li r6,1 - stw r6,kexec_flag-1b(5) - mr r3,r25 # my phys cpu - mr r4,r30 # start, aka phys mem offset - mtlr 4 - li r5,0 - blr /* image->start(physid, image->start, 0); */ -#endif /* CONFIG_KEXEC */ diff --git a/arch/ppc64/kernel/ppc_ksyms.c b/arch/ppc64/kernel/ppc_ksyms.c deleted file mode 100644 index 84006e26342c..000000000000 --- a/arch/ppc64/kernel/ppc_ksyms.c +++ /dev/null @@ -1,76 +0,0 @@ -/* - * c 2001 PPC 64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include <linux/config.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/console.h> -#include <net/checksum.h> - -#include <asm/processor.h> -#include <asm/uaccess.h> -#include <asm/io.h> -#include <asm/system.h> -#include <asm/hw_irq.h> -#include <asm/abs_addr.h> -#include <asm/cacheflush.h> - -EXPORT_SYMBOL(strcpy); -EXPORT_SYMBOL(strncpy); -EXPORT_SYMBOL(strcat); -EXPORT_SYMBOL(strncat); -EXPORT_SYMBOL(strchr); -EXPORT_SYMBOL(strrchr); -EXPORT_SYMBOL(strpbrk); -EXPORT_SYMBOL(strstr); -EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(strnlen); -EXPORT_SYMBOL(strcmp); -EXPORT_SYMBOL(strncmp); - -EXPORT_SYMBOL(csum_partial); -EXPORT_SYMBOL(csum_partial_copy_generic); -EXPORT_SYMBOL(ip_fast_csum); -EXPORT_SYMBOL(csum_tcpudp_magic); - -EXPORT_SYMBOL(__copy_tofrom_user); -EXPORT_SYMBOL(__clear_user); -EXPORT_SYMBOL(__strncpy_from_user); -EXPORT_SYMBOL(__strnlen_user); - -EXPORT_SYMBOL(reloc_offset); - -EXPORT_SYMBOL(_insb); -EXPORT_SYMBOL(_outsb); -EXPORT_SYMBOL(_insw); -EXPORT_SYMBOL(_outsw); -EXPORT_SYMBOL(_insl); -EXPORT_SYMBOL(_outsl); -EXPORT_SYMBOL(_insw_ns); -EXPORT_SYMBOL(_outsw_ns); -EXPORT_SYMBOL(_insl_ns); -EXPORT_SYMBOL(_outsl_ns); - -EXPORT_SYMBOL(kernel_thread); - -EXPORT_SYMBOL(giveup_fpu); -#ifdef CONFIG_ALTIVEC -EXPORT_SYMBOL(giveup_altivec); -#endif -EXPORT_SYMBOL(__flush_icache_range); -EXPORT_SYMBOL(flush_dcache_range); - -EXPORT_SYMBOL(memcpy); -EXPORT_SYMBOL(memset); -EXPORT_SYMBOL(memmove); -EXPORT_SYMBOL(memscan); -EXPORT_SYMBOL(memcmp); -EXPORT_SYMBOL(memchr); - -EXPORT_SYMBOL(timer_interrupt); -EXPORT_SYMBOL(console_drivers); diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c deleted file mode 100644 index 47cc26e78957..000000000000 --- a/arch/ppc64/kernel/prom.c +++ /dev/null @@ -1,1956 +0,0 @@ -/* - * - * - * Procedures for interfacing to Open Firmware. - * - * Paul Mackerras August 1996. - * Copyright (C) 1996 Paul Mackerras. - * - * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner. - * {engebret|bergner}@us.ibm.com - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#undef DEBUG - -#include <stdarg.h> -#include <linux/config.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/init.h> -#include <linux/threads.h> -#include <linux/spinlock.h> -#include <linux/types.h> -#include <linux/pci.h> -#include <linux/stringify.h> -#include <linux/delay.h> -#include <linux/initrd.h> -#include <linux/bitops.h> -#include <linux/module.h> -#include <linux/module.h> - -#include <asm/prom.h> -#include <asm/rtas.h> -#include <asm/lmb.h> -#include <asm/abs_addr.h> -#include <asm/page.h> -#include <asm/processor.h> -#include <asm/irq.h> -#include <asm/io.h> -#include <asm/smp.h> -#include <asm/system.h> -#include <asm/mmu.h> -#include <asm/pgtable.h> -#include <asm/pci.h> -#include <asm/iommu.h> -#include <asm/btext.h> -#include <asm/sections.h> -#include <asm/machdep.h> -#include <asm/pSeries_reconfig.h> - -#ifdef DEBUG -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif - -struct pci_reg_property { - struct pci_address addr; - u32 size_hi; - u32 size_lo; -}; - -struct isa_reg_property { - u32 space; - u32 address; - u32 size; -}; - - -typedef int interpret_func(struct device_node *, unsigned long *, - int, int, int); - -extern struct rtas_t rtas; -extern struct lmb lmb; -extern unsigned long klimit; -extern unsigned long memory_limit; - -static int __initdata dt_root_addr_cells; -static int __initdata dt_root_size_cells; -static int __initdata iommu_is_off; -int __initdata iommu_force_on; -unsigned long tce_alloc_start, tce_alloc_end; - -typedef u32 cell_t; - -#if 0 -static struct boot_param_header *initial_boot_params __initdata; -#else -struct boot_param_header *initial_boot_params; -#endif - -static struct device_node *allnodes = NULL; - -/* use when traversing tree through the allnext, child, sibling, - * or parent members of struct device_node. - */ -static DEFINE_RWLOCK(devtree_lock); - -/* export that to outside world */ -struct device_node *of_chosen; - -/* - * Wrapper for allocating memory for various data that needs to be - * attached to device nodes as they are processed at boot or when - * added to the device tree later (e.g. DLPAR). At boot there is - * already a region reserved so we just increment *mem_start by size; - * otherwise we call kmalloc. - */ -static void * prom_alloc(unsigned long size, unsigned long *mem_start) -{ - unsigned long tmp; - - if (!mem_start) - return kmalloc(size, GFP_KERNEL); - - tmp = *mem_start; - *mem_start += size; - return (void *)tmp; -} - -/* - * Find the device_node with a given phandle. - */ -static struct device_node * find_phandle(phandle ph) -{ - struct device_node *np; - - for (np = allnodes; np != 0; np = np->allnext) - if (np->linux_phandle == ph) - return np; - return NULL; -} - -/* - * Find the interrupt parent of a node. - */ -static struct device_node * __devinit intr_parent(struct device_node *p) -{ - phandle *parp; - - parp = (phandle *) get_property(p, "interrupt-parent", NULL); - if (parp == NULL) - return p->parent; - return find_phandle(*parp); -} - -/* - * Find out the size of each entry of the interrupts property - * for a node. - */ -int __devinit prom_n_intr_cells(struct device_node *np) -{ - struct device_node *p; - unsigned int *icp; - - for (p = np; (p = intr_parent(p)) != NULL; ) { - icp = (unsigned int *) - get_property(p, "#interrupt-cells", NULL); - if (icp != NULL) - return *icp; - if (get_property(p, "interrupt-controller", NULL) != NULL - || get_property(p, "interrupt-map", NULL) != NULL) { - printk("oops, node %s doesn't have #interrupt-cells\n", - p->full_name); - return 1; - } - } -#ifdef DEBUG_IRQ - printk("prom_n_intr_cells failed for %s\n", np->full_name); -#endif - return 1; -} - -/* - * Map an interrupt from a device up to the platform interrupt - * descriptor. - */ -static int __devinit map_interrupt(unsigned int **irq, struct device_node **ictrler, - struct device_node *np, unsigned int *ints, - int nintrc) -{ - struct device_node *p, *ipar; - unsigned int *imap, *imask, *ip; - int i, imaplen, match; - int newintrc = 0, newaddrc = 0; - unsigned int *reg; - int naddrc; - - reg = (unsigned int *) get_property(np, "reg", NULL); - naddrc = prom_n_addr_cells(np); - p = intr_parent(np); - while (p != NULL) { - if (get_property(p, "interrupt-controller", NULL) != NULL) - /* this node is an interrupt controller, stop here */ - break; - imap = (unsigned int *) - get_property(p, "interrupt-map", &imaplen); - if (imap == NULL) { - p = intr_parent(p); - continue; - } - imask = (unsigned int *) - get_property(p, "interrupt-map-mask", NULL); - if (imask == NULL) { - printk("oops, %s has interrupt-map but no mask\n", - p->full_name); - return 0; - } - imaplen /= sizeof(unsigned int); - match = 0; - ipar = NULL; - while (imaplen > 0 && !match) { - /* check the child-interrupt field */ - match = 1; - for (i = 0; i < naddrc && match; ++i) - match = ((reg[i] ^ imap[i]) & imask[i]) == 0; - for (; i < naddrc + nintrc && match; ++i) - match = ((ints[i-naddrc] ^ imap[i]) & imask[i]) == 0; - imap += naddrc + nintrc; - imaplen -= naddrc + nintrc; - /* grab the interrupt parent */ - ipar = find_phandle((phandle) *imap++); - --imaplen; - if (ipar == NULL) { - printk("oops, no int parent %x in map of %s\n", - imap[-1], p->full_name); - return 0; - } - /* find the parent's # addr and intr cells */ - ip = (unsigned int *) - get_property(ipar, "#interrupt-cells", NULL); - if (ip == NULL) { - printk("oops, no #interrupt-cells on %s\n", - ipar->full_name); - return 0; - } - newintrc = *ip; - ip = (unsigned int *) - get_property(ipar, "#address-cells", NULL); - newaddrc = (ip == NULL)? 0: *ip; - imap += newaddrc + newintrc; - imaplen -= newaddrc + newintrc; - } - if (imaplen < 0) { - printk("oops, error decoding int-map on %s, len=%d\n", - p->full_name, imaplen); - return 0; - } - if (!match) { -#ifdef DEBUG_IRQ - printk("oops, no match in %s int-map for %s\n", - p->full_name, np->full_name); -#endif - return 0; - } - p = ipar; - naddrc = newaddrc; - nintrc = newintrc; - ints = imap - nintrc; - reg = ints - naddrc; - } - if (p == NULL) { -#ifdef DEBUG_IRQ - printk("hmmm, int tree for %s doesn't have ctrler\n", - np->full_name); -#endif - return 0; - } - *irq = ints; - *ictrler = p; - return nintrc; -} - -static int __devinit finish_node_interrupts(struct device_node *np, - unsigned long *mem_start, - int measure_only) -{ - unsigned int *ints; - int intlen, intrcells, intrcount; - int i, j, n; - unsigned int *irq, virq; - struct device_node *ic; - - ints = (unsigned int *) get_property(np, "interrupts", &intlen); - if (ints == NULL) - return 0; - intrcells = prom_n_intr_cells(np); - intlen /= intrcells * sizeof(unsigned int); - - np->intrs = prom_alloc(intlen * sizeof(*(np->intrs)), mem_start); - if (!np->intrs) - return -ENOMEM; - - if (measure_only) - return 0; - - intrcount = 0; - for (i = 0; i < intlen; ++i, ints += intrcells) { - n = map_interrupt(&irq, &ic, np, ints, intrcells); - if (n <= 0) - continue; - - /* don't map IRQ numbers under a cascaded 8259 controller */ - if (ic && device_is_compatible(ic, "chrp,iic")) { - np->intrs[intrcount].line = irq[0]; - } else { - virq = virt_irq_create_mapping(irq[0]); - if (virq == NO_IRQ) { - printk(KERN_CRIT "Could not allocate interrupt" - " number for %s\n", np->full_name); - continue; - } - np->intrs[intrcount].line = irq_offset_up(virq); - } - - /* We offset irq numbers for the u3 MPIC by 128 in PowerMac */ - if (_machine == PLATFORM_POWERMAC && ic && ic->parent) { - char *name = get_property(ic->parent, "name", NULL); - if (name && !strcmp(name, "u3")) - np->intrs[intrcount].line += 128; - else if (!(name && !strcmp(name, "mac-io"))) - /* ignore other cascaded controllers, such as - the k2-sata-root */ - break; - } - np->intrs[intrcount].sense = 1; - if (n > 1) - np->intrs[intrcount].sense = irq[1]; - if (n > 2) { - printk("hmmm, got %d intr cells for %s:", n, - np->full_name); - for (j = 0; j < n; ++j) - printk(" %d", irq[j]); - printk("\n"); - } - ++intrcount; - } - np->n_intrs = intrcount; - - return 0; -} - -static int __devinit interpret_pci_props(struct device_node *np, - unsigned long *mem_start, - int naddrc, int nsizec, - int measure_only) -{ - struct address_range *adr; - struct pci_reg_property *pci_addrs; - int i, l, n_addrs; - - pci_addrs = (struct pci_reg_property *) - get_property(np, "assigned-addresses", &l); - if (!pci_addrs) - return 0; - - n_addrs = l / sizeof(*pci_addrs); - - adr = prom_alloc(n_addrs * sizeof(*adr), mem_start); - if (!adr) - return -ENOMEM; - - if (measure_only) - return 0; - - np->addrs = adr; - np->n_addrs = n_addrs; - - for (i = 0; i < n_addrs; i++) { - adr[i].space = pci_addrs[i].addr.a_hi; - adr[i].address = pci_addrs[i].addr.a_lo | - ((u64)pci_addrs[i].addr.a_mid << 32); - adr[i].size = pci_addrs[i].size_lo; - } - - return 0; -} - -static int __init interpret_dbdma_props(struct device_node *np, - unsigned long *mem_start, - int naddrc, int nsizec, - int measure_only) -{ - struct reg_property32 *rp; - struct address_range *adr; - unsigned long base_address; - int i, l; - struct device_node *db; - - base_address = 0; - if (!measure_only) { - for (db = np->parent; db != NULL; db = db->parent) { - if (!strcmp(db->type, "dbdma") && db->n_addrs != 0) { - base_address = db->addrs[0].address; - break; - } - } - } - - rp = (struct reg_property32 *) get_property(np, "reg", &l); - if (rp != 0 && l >= sizeof(struct reg_property32)) { - i = 0; - adr = (struct address_range *) (*mem_start); - while ((l -= sizeof(struct reg_property32)) >= 0) { - if (!measure_only) { - adr[i].space = 2; - adr[i].address = rp[i].address + base_address; - adr[i].size = rp[i].size; - } - ++i; - } - np->addrs = adr; - np->n_addrs = i; - (*mem_start) += i * sizeof(struct address_range); - } - - return 0; -} - -static int __init interpret_macio_props(struct device_node *np, - unsigned long *mem_start, - int naddrc, int nsizec, - int measure_only) -{ - struct reg_property32 *rp; - struct address_range *adr; - unsigned long base_address; - int i, l; - struct device_node *db; - - base_address = 0; - if (!measure_only) { - for (db = np->parent; db != NULL; db = db->parent) { - if (!strcmp(db->type, "mac-io") && db->n_addrs != 0) { - base_address = db->addrs[0].address; - break; - } - } - } - - rp = (struct reg_property32 *) get_property(np, "reg", &l); - if (rp != 0 && l >= sizeof(struct reg_property32)) { - i = 0; - adr = (struct address_range *) (*mem_start); - while ((l -= sizeof(struct reg_property32)) >= 0) { - if (!measure_only) { - adr[i].space = 2; - adr[i].address = rp[i].address + base_address; - adr[i].size = rp[i].size; - } - ++i; - } - np->addrs = adr; - np->n_addrs = i; - (*mem_start) += i * sizeof(struct address_range); - } - - return 0; -} - -static int __init interpret_isa_props(struct device_node *np, - unsigned long *mem_start, - int naddrc, int nsizec, - int measure_only) -{ - struct isa_reg_property *rp; - struct address_range *adr; - int i, l; - - rp = (struct isa_reg_property *) get_property(np, "reg", &l); - if (rp != 0 && l >= sizeof(struct isa_reg_property)) { - i = 0; - adr = (struct address_range *) (*mem_start); - while ((l -= sizeof(struct isa_reg_property)) >= 0) { - if (!measure_only) { - adr[i].space = rp[i].space; - adr[i].address = rp[i].address; - adr[i].size = rp[i].size; - } - ++i; - } - np->addrs = adr; - np->n_addrs = i; - (*mem_start) += i * sizeof(struct address_range); - } - - return 0; -} - -static int __init interpret_root_props(struct device_node *np, - unsigned long *mem_start, - int naddrc, int nsizec, - int measure_only) -{ - struct address_range *adr; - int i, l; - unsigned int *rp; - int rpsize = (naddrc + nsizec) * sizeof(unsigned int); - - rp = (unsigned int *) get_property(np, "reg", &l); - if (rp != 0 && l >= rpsize) { - i = 0; - adr = (struct address_range *) (*mem_start); - while ((l -= rpsize) >= 0) { - if (!measure_only) { - adr[i].space = 0; - adr[i].address = rp[naddrc - 1]; - adr[i].size = rp[naddrc + nsizec - 1]; - } - ++i; - rp += naddrc + nsizec; - } - np->addrs = adr; - np->n_addrs = i; - (*mem_start) += i * sizeof(struct address_range); - } - - return 0; -} - -static int __devinit finish_node(struct device_node *np, - unsigned long *mem_start, - interpret_func *ifunc, - int naddrc, int nsizec, - int measure_only) -{ - struct device_node *child; - int *ip, rc = 0; - - /* get the device addresses and interrupts */ - if (ifunc != NULL) - rc = ifunc(np, mem_start, naddrc, nsizec, measure_only); - if (rc) - goto out; - - rc = finish_node_interrupts(np, mem_start, measure_only); - if (rc) - goto out; - - /* Look for #address-cells and #size-cells properties. */ - ip = (int *) get_property(np, "#address-cells", NULL); - if (ip != NULL) - naddrc = *ip; - ip = (int *) get_property(np, "#size-cells", NULL); - if (ip != NULL) - nsizec = *ip; - - if (!strcmp(np->name, "device-tree") || np->parent == NULL) - ifunc = interpret_root_props; - else if (np->type == 0) - ifunc = NULL; - else if (!strcmp(np->type, "pci") || !strcmp(np->type, "vci")) - ifunc = interpret_pci_props; - else if (!strcmp(np->type, "dbdma")) - ifunc = interpret_dbdma_props; - else if (!strcmp(np->type, "mac-io") || ifunc == interpret_macio_props) - ifunc = interpret_macio_props; - else if (!strcmp(np->type, "isa")) - ifunc = interpret_isa_props; - else if (!strcmp(np->name, "uni-n") || !strcmp(np->name, "u3")) - ifunc = interpret_root_props; - else if (!((ifunc == interpret_dbdma_props - || ifunc == interpret_macio_props) - && (!strcmp(np->type, "escc") - || !strcmp(np->type, "media-bay")))) - ifunc = NULL; - - for (child = np->child; child != NULL; child = child->sibling) { - rc = finish_node(child, mem_start, ifunc, - naddrc, nsizec, measure_only); - if (rc) - goto out; - } -out: - return rc; -} - -/** - * finish_device_tree is called once things are running normally - * (i.e. with text and data mapped to the address they were linked at). - * It traverses the device tree and fills in some of the additional, - * fields in each node like {n_}addrs and {n_}intrs, the virt interrupt - * mapping is also initialized at this point. - */ -void __init finish_device_tree(void) -{ - unsigned long start, end, size = 0; - - DBG(" -> finish_device_tree\n"); - - if (ppc64_interrupt_controller == IC_INVALID) { - DBG("failed to configure interrupt controller type\n"); - panic("failed to configure interrupt controller type\n"); - } - - /* Initialize virtual IRQ map */ - virt_irq_init(); - - /* - * Finish device-tree (pre-parsing some properties etc...) - * We do this in 2 passes. One with "measure_only" set, which - * will only measure the amount of memory needed, then we can - * allocate that memory, and call finish_node again. However, - * we must be careful as most routines will fail nowadays when - * prom_alloc() returns 0, so we must make sure our first pass - * doesn't start at 0. We pre-initialize size to 16 for that - * reason and then remove those additional 16 bytes - */ - size = 16; - finish_node(allnodes, &size, NULL, 0, 0, 1); - size -= 16; - end = start = (unsigned long)abs_to_virt(lmb_alloc(size, 128)); - finish_node(allnodes, &end, NULL, 0, 0, 0); - BUG_ON(end != start + size); - - DBG(" <- finish_device_tree\n"); -} - -#ifdef DEBUG -#define printk udbg_printf -#endif - -static inline char *find_flat_dt_string(u32 offset) -{ - return ((char *)initial_boot_params) + - initial_boot_params->off_dt_strings + offset; -} - -/** - * This function is used to scan the flattened device-tree, it is - * used to extract the memory informations at boot before we can - * unflatten the tree - */ -int __init of_scan_flat_dt(int (*it)(unsigned long node, - const char *uname, int depth, - void *data), - void *data) -{ - unsigned long p = ((unsigned long)initial_boot_params) + - initial_boot_params->off_dt_struct; - int rc = 0; - int depth = -1; - - do { - u32 tag = *((u32 *)p); - char *pathp; - - p += 4; - if (tag == OF_DT_END_NODE) { - depth --; - continue; - } - if (tag == OF_DT_NOP) - continue; - if (tag == OF_DT_END) - break; - if (tag == OF_DT_PROP) { - u32 sz = *((u32 *)p); - p += 8; - if (initial_boot_params->version < 0x10) - p = _ALIGN(p, sz >= 8 ? 8 : 4); - p += sz; - p = _ALIGN(p, 4); - continue; - } - if (tag != OF_DT_BEGIN_NODE) { - printk(KERN_WARNING "Invalid tag %x scanning flattened" - " device tree !\n", tag); - return -EINVAL; - } - depth++; - pathp = (char *)p; - p = _ALIGN(p + strlen(pathp) + 1, 4); - if ((*pathp) == '/') { - char *lp, *np; - for (lp = NULL, np = pathp; *np; np++) - if ((*np) == '/') - lp = np+1; - if (lp != NULL) - pathp = lp; - } - rc = it(p, pathp, depth, data); - if (rc != 0) - break; - } while(1); - - return rc; -} - -/** - * This function can be used within scan_flattened_dt callback to get - * access to properties - */ -void* __init of_get_flat_dt_prop(unsigned long node, const char *name, - unsigned long *size) -{ - unsigned long p = node; - - do { - u32 tag = *((u32 *)p); - u32 sz, noff; - const char *nstr; - - p += 4; - if (tag == OF_DT_NOP) - continue; - if (tag != OF_DT_PROP) - return NULL; - - sz = *((u32 *)p); - noff = *((u32 *)(p + 4)); - p += 8; - if (initial_boot_params->version < 0x10) - p = _ALIGN(p, sz >= 8 ? 8 : 4); - - nstr = find_flat_dt_string(noff); - if (nstr == NULL) { - printk(KERN_WARNING "Can't find property index" - " name !\n"); - return NULL; - } - if (strcmp(name, nstr) == 0) { - if (size) - *size = sz; - return (void *)p; - } - p += sz; - p = _ALIGN(p, 4); - } while(1); -} - -static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size, - unsigned long align) -{ - void *res; - - *mem = _ALIGN(*mem, align); - res = (void *)*mem; - *mem += size; - - return res; -} - -static unsigned long __init unflatten_dt_node(unsigned long mem, - unsigned long *p, - struct device_node *dad, - struct device_node ***allnextpp, - unsigned long fpsize) -{ - struct device_node *np; - struct property *pp, **prev_pp = NULL; - char *pathp; - u32 tag; - unsigned int l, allocl; - int has_name = 0; - int new_format = 0; - - tag = *((u32 *)(*p)); - if (tag != OF_DT_BEGIN_NODE) { - printk("Weird tag at start of node: %x\n", tag); - return mem; - } - *p += 4; - pathp = (char *)*p; - l = allocl = strlen(pathp) + 1; - *p = _ALIGN(*p + l, 4); - - /* version 0x10 has a more compact unit name here instead of the full - * path. we accumulate the full path size using "fpsize", we'll rebuild - * it later. We detect this because the first character of the name is - * not '/'. - */ - if ((*pathp) != '/') { - new_format = 1; - if (fpsize == 0) { - /* root node: special case. fpsize accounts for path - * plus terminating zero. root node only has '/', so - * fpsize should be 2, but we want to avoid the first - * level nodes to have two '/' so we use fpsize 1 here - */ - fpsize = 1; - allocl = 2; - } else { - /* account for '/' and path size minus terminal 0 - * already in 'l' - */ - fpsize += l; - allocl = fpsize; - } - } - - - np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl, - __alignof__(struct device_node)); - if (allnextpp) { - memset(np, 0, sizeof(*np)); - np->full_name = ((char*)np) + sizeof(struct device_node); - if (new_format) { - char *p = np->full_name; - /* rebuild full path for new format */ - if (dad && dad->parent) { - strcpy(p, dad->full_name); -#ifdef DEBUG - if ((strlen(p) + l + 1) != allocl) { - DBG("%s: p: %d, l: %d, a: %d\n", - pathp, strlen(p), l, allocl); - } -#endif - p += strlen(p); - } - *(p++) = '/'; - memcpy(p, pathp, l); - } else - memcpy(np->full_name, pathp, l); - prev_pp = &np->properties; - **allnextpp = np; - *allnextpp = &np->allnext; - if (dad != NULL) { - np->parent = dad; - /* we temporarily use the next field as `last_child'*/ - if (dad->next == 0) - dad->child = np; - else - dad->next->sibling = np; - dad->next = np; - } - kref_init(&np->kref); - } - while(1) { - u32 sz, noff; - char *pname; - - tag = *((u32 *)(*p)); - if (tag == OF_DT_NOP) { - *p += 4; - continue; - } - if (tag != OF_DT_PROP) - break; - *p += 4; - sz = *((u32 *)(*p)); - noff = *((u32 *)((*p) + 4)); - *p += 8; - if (initial_boot_params->version < 0x10) - *p = _ALIGN(*p, sz >= 8 ? 8 : 4); - - pname = find_flat_dt_string(noff); - if (pname == NULL) { - printk("Can't find property name in list !\n"); - break; - } - if (strcmp(pname, "name") == 0) - has_name = 1; - l = strlen(pname) + 1; - pp = unflatten_dt_alloc(&mem, sizeof(struct property), - __alignof__(struct property)); - if (allnextpp) { - if (strcmp(pname, "linux,phandle") == 0) { - np->node = *((u32 *)*p); - if (np->linux_phandle == 0) - np->linux_phandle = np->node; - } - if (strcmp(pname, "ibm,phandle") == 0) - np->linux_phandle = *((u32 *)*p); - pp->name = pname; - pp->length = sz; - pp->value = (void *)*p; - *prev_pp = pp; - prev_pp = &pp->next; - } - *p = _ALIGN((*p) + sz, 4); - } - /* with version 0x10 we may not have the name property, recreate - * it here from the unit name if absent - */ - if (!has_name) { - char *p = pathp, *ps = pathp, *pa = NULL; - int sz; - - while (*p) { - if ((*p) == '@') - pa = p; - if ((*p) == '/') - ps = p + 1; - p++; - } - if (pa < ps) - pa = p; - sz = (pa - ps) + 1; - pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz, - __alignof__(struct property)); - if (allnextpp) { - pp->name = "name"; - pp->length = sz; - pp->value = (unsigned char *)(pp + 1); - *prev_pp = pp; - prev_pp = &pp->next; - memcpy(pp->value, ps, sz - 1); - ((char *)pp->value)[sz - 1] = 0; - DBG("fixed up name for %s -> %s\n", pathp, pp->value); - } - } - if (allnextpp) { - *prev_pp = NULL; - np->name = get_property(np, "name", NULL); - np->type = get_property(np, "device_type", NULL); - - if (!np->name) - np->name = "<NULL>"; - if (!np->type) - np->type = "<NULL>"; - } - while (tag == OF_DT_BEGIN_NODE) { - mem = unflatten_dt_node(mem, p, np, allnextpp, fpsize); - tag = *((u32 *)(*p)); - } - if (tag != OF_DT_END_NODE) { - printk("Weird tag at end of node: %x\n", tag); - return mem; - } - *p += 4; - return mem; -} - - -/** - * unflattens the device-tree passed by the firmware, creating the - * tree of struct device_node. It also fills the "name" and "type" - * pointers of the nodes so the normal device-tree walking functions - * can be used (this used to be done by finish_device_tree) - */ -void __init unflatten_device_tree(void) -{ - unsigned long start, mem, size; - struct device_node **allnextp = &allnodes; - char *p = NULL; - int l = 0; - - DBG(" -> unflatten_device_tree()\n"); - - /* First pass, scan for size */ - start = ((unsigned long)initial_boot_params) + - initial_boot_params->off_dt_struct; - size = unflatten_dt_node(0, &start, NULL, NULL, 0); - size = (size | 3) + 1; - - DBG(" size is %lx, allocating...\n", size); - - /* Allocate memory for the expanded device tree */ - mem = lmb_alloc(size + 4, __alignof__(struct device_node)); - if (!mem) { - DBG("Couldn't allocate memory with lmb_alloc()!\n"); - panic("Couldn't allocate memory with lmb_alloc()!\n"); - } - mem = (unsigned long)abs_to_virt(mem); - - ((u32 *)mem)[size / 4] = 0xdeadbeef; - - DBG(" unflattening...\n", mem); - - /* Second pass, do actual unflattening */ - start = ((unsigned long)initial_boot_params) + - initial_boot_params->off_dt_struct; - unflatten_dt_node(mem, &start, NULL, &allnextp, 0); - if (*((u32 *)start) != OF_DT_END) - printk(KERN_WARNING "Weird tag at end of tree: %08x\n", *((u32 *)start)); - if (((u32 *)mem)[size / 4] != 0xdeadbeef) - printk(KERN_WARNING "End of tree marker overwritten: %08x\n", - ((u32 *)mem)[size / 4] ); - *allnextp = NULL; - - /* Get pointer to OF "/chosen" node for use everywhere */ - of_chosen = of_find_node_by_path("/chosen"); - - /* Retreive command line */ - if (of_chosen != NULL) { - p = (char *)get_property(of_chosen, "bootargs", &l); - if (p != NULL && l > 0) - strlcpy(cmd_line, p, min(l, COMMAND_LINE_SIZE)); - } -#ifdef CONFIG_CMDLINE - if (l == 0 || (l == 1 && (*p) == 0)) - strlcpy(cmd_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); -#endif /* CONFIG_CMDLINE */ - - DBG("Command line is: %s\n", cmd_line); - - DBG(" <- unflatten_device_tree()\n"); -} - - -static int __init early_init_dt_scan_cpus(unsigned long node, - const char *uname, int depth, void *data) -{ - char *type = of_get_flat_dt_prop(node, "device_type", NULL); - u32 *prop; - unsigned long size; - - /* We are scanning "cpu" nodes only */ - if (type == NULL || strcmp(type, "cpu") != 0) - return 0; - - if (initial_boot_params && initial_boot_params->version >= 2) { - /* version 2 of the kexec param format adds the phys cpuid - * of booted proc. - */ - boot_cpuid_phys = initial_boot_params->boot_cpuid_phys; - boot_cpuid = 0; - } else { - /* Check if it's the boot-cpu, set it's hw index in paca now */ - if (of_get_flat_dt_prop(node, "linux,boot-cpu", NULL) - != NULL) { - u32 *prop = of_get_flat_dt_prop(node, "reg", NULL); - set_hard_smp_processor_id(0, prop == NULL ? 0 : *prop); - boot_cpuid_phys = get_hard_smp_processor_id(0); - } - } - -#ifdef CONFIG_ALTIVEC - /* Check if we have a VMX and eventually update CPU features */ - prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", NULL); - if (prop && (*prop) > 0) { - cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC; - cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC; - } - - /* Same goes for Apple's "altivec" property */ - prop = (u32 *)of_get_flat_dt_prop(node, "altivec", NULL); - if (prop) { - cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC; - cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC; - } -#endif /* CONFIG_ALTIVEC */ - - /* - * Check for an SMT capable CPU and set the CPU feature. We do - * this by looking at the size of the ibm,ppc-interrupt-server#s - * property - */ - prop = (u32 *)of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", - &size); - cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; - if (prop && ((size / sizeof(u32)) > 1)) - cur_cpu_spec->cpu_features |= CPU_FTR_SMT; - - return 0; -} - -static int __init early_init_dt_scan_chosen(unsigned long node, - const char *uname, int depth, void *data) -{ - u32 *prop; - u64 *prop64; - - DBG("search \"chosen\", depth: %d, uname: %s\n", depth, uname); - - if (depth != 1 || strcmp(uname, "chosen") != 0) - return 0; - - /* get platform type */ - prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL); - if (prop == NULL) - return 0; - _machine = *prop; - - /* check if iommu is forced on or off */ - if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL) - iommu_is_off = 1; - if (of_get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL) - iommu_force_on = 1; - - prop64 = (u64*)of_get_flat_dt_prop(node, "linux,memory-limit", NULL); - if (prop64) - memory_limit = *prop64; - - prop64 = (u64*)of_get_flat_dt_prop(node, "linux,tce-alloc-start",NULL); - if (prop64) - tce_alloc_start = *prop64; - - prop64 = (u64*)of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); - if (prop64) - tce_alloc_end = *prop64; - -#ifdef CONFIG_PPC_RTAS - /* To help early debugging via the front panel, we retreive a minimal - * set of RTAS infos now if available - */ - { - u64 *basep, *entryp; - - basep = (u64*)of_get_flat_dt_prop(node, - "linux,rtas-base", NULL); - entryp = (u64*)of_get_flat_dt_prop(node, - "linux,rtas-entry", NULL); - prop = (u32*)of_get_flat_dt_prop(node, - "linux,rtas-size", NULL); - if (basep && entryp && prop) { - rtas.base = *basep; - rtas.entry = *entryp; - rtas.size = *prop; - } - } -#endif /* CONFIG_PPC_RTAS */ - - /* break now */ - return 1; -} - -static int __init early_init_dt_scan_root(unsigned long node, - const char *uname, int depth, void *data) -{ - u32 *prop; - - if (depth != 0) - return 0; - - prop = (u32 *)of_get_flat_dt_prop(node, "#size-cells", NULL); - dt_root_size_cells = (prop == NULL) ? 1 : *prop; - DBG("dt_root_size_cells = %x\n", dt_root_size_cells); - - prop = (u32 *)of_get_flat_dt_prop(node, "#address-cells", NULL); - dt_root_addr_cells = (prop == NULL) ? 2 : *prop; - DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells); - - /* break now */ - return 1; -} - -static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp) -{ - cell_t *p = *cellp; - unsigned long r = 0; - - /* Ignore more than 2 cells */ - while (s > 2) { - p++; - s--; - } - while (s) { - r <<= 32; - r |= *(p++); - s--; - } - - *cellp = p; - return r; -} - - -static int __init early_init_dt_scan_memory(unsigned long node, - const char *uname, int depth, void *data) -{ - char *type = of_get_flat_dt_prop(node, "device_type", NULL); - cell_t *reg, *endp; - unsigned long l; - - /* We are scanning "memory" nodes only */ - if (type == NULL || strcmp(type, "memory") != 0) - return 0; - - reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l); - if (reg == NULL) - return 0; - - endp = reg + (l / sizeof(cell_t)); - - DBG("memory scan node %s ..., reg size %ld, data: %x %x %x %x, ...\n", - uname, l, reg[0], reg[1], reg[2], reg[3]); - - while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { - unsigned long base, size; - - base = dt_mem_next_cell(dt_root_addr_cells, ®); - size = dt_mem_next_cell(dt_root_size_cells, ®); - - if (size == 0) - continue; - DBG(" - %lx , %lx\n", base, size); - if (iommu_is_off) { - if (base >= 0x80000000ul) - continue; - if ((base + size) > 0x80000000ul) - size = 0x80000000ul - base; - } - lmb_add(base, size); - } - return 0; -} - -static void __init early_reserve_mem(void) -{ - u64 base, size; - u64 *reserve_map = (u64 *)(((unsigned long)initial_boot_params) + - initial_boot_params->off_mem_rsvmap); - while (1) { - base = *(reserve_map++); - size = *(reserve_map++); - if (size == 0) - break; - DBG("reserving: %lx -> %lx\n", base, size); - lmb_reserve(base, size); - } - -#if 0 - DBG("memory reserved, lmbs :\n"); - lmb_dump_all(); -#endif -} - -void __init early_init_devtree(void *params) -{ - DBG(" -> early_init_devtree()\n"); - - /* Setup flat device-tree pointer */ - initial_boot_params = params; - - /* Retreive various informations from the /chosen node of the - * device-tree, including the platform type, initrd location and - * size, TCE reserve, and more ... - */ - of_scan_flat_dt(early_init_dt_scan_chosen, NULL); - - /* Scan memory nodes and rebuild LMBs */ - lmb_init(); - of_scan_flat_dt(early_init_dt_scan_root, NULL); - of_scan_flat_dt(early_init_dt_scan_memory, NULL); - lmb_enforce_memory_limit(memory_limit); - lmb_analyze(); - lmb_reserve(0, __pa(klimit)); - - /* Reserve LMB regions used by kernel, initrd, dt, etc... */ - early_reserve_mem(); - - DBG("Scanning CPUs ...\n"); - - /* Retreive hash table size from flattened tree plus other - * CPU related informations (altivec support, boot CPU ID, ...) - */ - of_scan_flat_dt(early_init_dt_scan_cpus, NULL); - - DBG(" <- early_init_devtree()\n"); -} - -#undef printk - -int -prom_n_addr_cells(struct device_node* np) -{ - int* ip; - do { - if (np->parent) - np = np->parent; - ip = (int *) get_property(np, "#address-cells", NULL); - if (ip != NULL) - return *ip; - } while (np->parent); - /* No #address-cells property for the root node, default to 1 */ - return 1; -} -EXPORT_SYMBOL_GPL(prom_n_addr_cells); - -int -prom_n_size_cells(struct device_node* np) -{ - int* ip; - do { - if (np->parent) - np = np->parent; - ip = (int *) get_property(np, "#size-cells", NULL); - if (ip != NULL) - return *ip; - } while (np->parent); - /* No #size-cells property for the root node, default to 1 */ - return 1; -} -EXPORT_SYMBOL_GPL(prom_n_size_cells); - -/** - * Work out the sense (active-low level / active-high edge) - * of each interrupt from the device tree. - */ -void __init prom_get_irq_senses(unsigned char *senses, int off, int max) -{ - struct device_node *np; - int i, j; - - /* default to level-triggered */ - memset(senses, 1, max - off); - - for (np = allnodes; np != 0; np = np->allnext) { - for (j = 0; j < np->n_intrs; j++) { - i = np->intrs[j].line; - if (i >= off && i < max) - senses[i-off] = np->intrs[j].sense ? - IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE : - IRQ_SENSE_EDGE | IRQ_POLARITY_POSITIVE; - } - } -} - -/** - * Construct and return a list of the device_nodes with a given name. - */ -struct device_node * -find_devices(const char *name) -{ - struct device_node *head, **prevp, *np; - - prevp = &head; - for (np = allnodes; np != 0; np = np->allnext) { - if (np->name != 0 && strcasecmp(np->name, name) == 0) { - *prevp = np; - prevp = &np->next; - } - } - *prevp = NULL; - return head; -} -EXPORT_SYMBOL(find_devices); - -/** - * Construct and return a list of the device_nodes with a given type. - */ -struct device_node * -find_type_devices(const char *type) -{ - struct device_node *head, **prevp, *np; - - prevp = &head; - for (np = allnodes; np != 0; np = np->allnext) { - if (np->type != 0 && strcasecmp(np->type, type) == 0) { - *prevp = np; - prevp = &np->next; - } - } - *prevp = NULL; - return head; -} -EXPORT_SYMBOL(find_type_devices); - -/** - * Returns all nodes linked together - */ -struct device_node * -find_all_nodes(void) -{ - struct device_node *head, **prevp, *np; - - prevp = &head; - for (np = allnodes; np != 0; np = np->allnext) { - *prevp = np; - prevp = &np->next; - } - *prevp = NULL; - return head; -} -EXPORT_SYMBOL(find_all_nodes); - -/** Checks if the given "compat" string matches one of the strings in - * the device's "compatible" property - */ -int -device_is_compatible(struct device_node *device, const char *compat) -{ - const char* cp; - int cplen, l; - - cp = (char *) get_property(device, "compatible", &cplen); - if (cp == NULL) - return 0; - while (cplen > 0) { - if (strncasecmp(cp, compat, strlen(compat)) == 0) - return 1; - l = strlen(cp) + 1; - cp += l; - cplen -= l; - } - - return 0; -} -EXPORT_SYMBOL(device_is_compatible); - - -/** - * Indicates whether the root node has a given value in its - * compatible property. - */ -int -machine_is_compatible(const char *compat) -{ - struct device_node *root; - int rc = 0; - - root = of_find_node_by_path("/"); - if (root) { - rc = device_is_compatible(root, compat); - of_node_put(root); - } - return rc; -} -EXPORT_SYMBOL(machine_is_compatible); - -/** - * Construct and return a list of the device_nodes with a given type - * and compatible property. - */ -struct device_node * -find_compatible_devices(const char *type, const char *compat) -{ - struct device_node *head, **prevp, *np; - - prevp = &head; - for (np = allnodes; np != 0; np = np->allnext) { - if (type != NULL - && !(np->type != 0 && strcasecmp(np->type, type) == 0)) - continue; - if (device_is_compatible(np, compat)) { - *prevp = np; - prevp = &np->next; - } - } - *prevp = NULL; - return head; -} -EXPORT_SYMBOL(find_compatible_devices); - -/** - * Find the device_node with a given full_name. - */ -struct device_node * -find_path_device(const char *path) -{ - struct device_node *np; - - for (np = allnodes; np != 0; np = np->allnext) - if (np->full_name != 0 && strcasecmp(np->full_name, path) == 0) - return np; - return NULL; -} -EXPORT_SYMBOL(find_path_device); - -/******* - * - * New implementation of the OF "find" APIs, return a refcounted - * object, call of_node_put() when done. The device tree and list - * are protected by a rw_lock. - * - * Note that property management will need some locking as well, - * this isn't dealt with yet. - * - *******/ - -/** - * of_find_node_by_name - Find a node by its "name" property - * @from: The node to start searching from or NULL, the node - * you pass will not be searched, only the next one - * will; typically, you pass what the previous call - * returned. of_node_put() will be called on it - * @name: The name string to match against - * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. - */ -struct device_node *of_find_node_by_name(struct device_node *from, - const char *name) -{ - struct device_node *np; - - read_lock(&devtree_lock); - np = from ? from->allnext : allnodes; - for (; np != 0; np = np->allnext) - if (np->name != 0 && strcasecmp(np->name, name) == 0 - && of_node_get(np)) - break; - if (from) - of_node_put(from); - read_unlock(&devtree_lock); - return np; -} -EXPORT_SYMBOL(of_find_node_by_name); - -/** - * of_find_node_by_type - Find a node by its "device_type" property - * @from: The node to start searching from or NULL, the node - * you pass will not be searched, only the next one - * will; typically, you pass what the previous call - * returned. of_node_put() will be called on it - * @name: The type string to match against - * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. - */ -struct device_node *of_find_node_by_type(struct device_node *from, - const char *type) -{ - struct device_node *np; - - read_lock(&devtree_lock); - np = from ? from->allnext : allnodes; - for (; np != 0; np = np->allnext) - if (np->type != 0 && strcasecmp(np->type, type) == 0 - && of_node_get(np)) - break; - if (from) - of_node_put(from); - read_unlock(&devtree_lock); - return np; -} -EXPORT_SYMBOL(of_find_node_by_type); - -/** - * of_find_compatible_node - Find a node based on type and one of the - * tokens in its "compatible" property - * @from: The node to start searching from or NULL, the node - * you pass will not be searched, only the next one - * will; typically, you pass what the previous call - * returned. of_node_put() will be called on it - * @type: The type string to match "device_type" or NULL to ignore - * @compatible: The string to match to one of the tokens in the device - * "compatible" list. - * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. - */ -struct device_node *of_find_compatible_node(struct device_node *from, - const char *type, const char *compatible) -{ - struct device_node *np; - - read_lock(&devtree_lock); - np = from ? from->allnext : allnodes; - for (; np != 0; np = np->allnext) { - if (type != NULL - && !(np->type != 0 && strcasecmp(np->type, type) == 0)) - continue; - if (device_is_compatible(np, compatible) && of_node_get(np)) - break; - } - if (from) - of_node_put(from); - read_unlock(&devtree_lock); - return np; -} -EXPORT_SYMBOL(of_find_compatible_node); - -/** - * of_find_node_by_path - Find a node matching a full OF path - * @path: The full path to match - * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. - */ -struct device_node *of_find_node_by_path(const char *path) -{ - struct device_node *np = allnodes; - - read_lock(&devtree_lock); - for (; np != 0; np = np->allnext) { - if (np->full_name != 0 && strcasecmp(np->full_name, path) == 0 - && of_node_get(np)) - break; - } - read_unlock(&devtree_lock); - return np; -} -EXPORT_SYMBOL(of_find_node_by_path); - -/** - * of_find_node_by_phandle - Find a node given a phandle - * @handle: phandle of the node to find - * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. - */ -struct device_node *of_find_node_by_phandle(phandle handle) -{ - struct device_node *np; - - read_lock(&devtree_lock); - for (np = allnodes; np != 0; np = np->allnext) - if (np->linux_phandle == handle) - break; - if (np) - of_node_get(np); - read_unlock(&devtree_lock); - return np; -} -EXPORT_SYMBOL(of_find_node_by_phandle); - -/** - * of_find_all_nodes - Get next node in global list - * @prev: Previous node or NULL to start iteration - * of_node_put() will be called on it - * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. - */ -struct device_node *of_find_all_nodes(struct device_node *prev) -{ - struct device_node *np; - - read_lock(&devtree_lock); - np = prev ? prev->allnext : allnodes; - for (; np != 0; np = np->allnext) - if (of_node_get(np)) - break; - if (prev) - of_node_put(prev); - read_unlock(&devtree_lock); - return np; -} -EXPORT_SYMBOL(of_find_all_nodes); - -/** - * of_get_parent - Get a node's parent if any - * @node: Node to get parent - * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. - */ -struct device_node *of_get_parent(const struct device_node *node) -{ - struct device_node *np; - - if (!node) - return NULL; - - read_lock(&devtree_lock); - np = of_node_get(node->parent); - read_unlock(&devtree_lock); - return np; -} -EXPORT_SYMBOL(of_get_parent); - -/** - * of_get_next_child - Iterate a node childs - * @node: parent node - * @prev: previous child of the parent node, or NULL to get first - * - * Returns a node pointer with refcount incremented, use - * of_node_put() on it when done. - */ -struct device_node *of_get_next_child(const struct device_node *node, - struct device_node *prev) -{ - struct device_node *next; - - read_lock(&devtree_lock); - next = prev ? prev->sibling : node->child; - for (; next != 0; next = next->sibling) - if (of_node_get(next)) - break; - if (prev) - of_node_put(prev); - read_unlock(&devtree_lock); - return next; -} -EXPORT_SYMBOL(of_get_next_child); - -/** - * of_node_get - Increment refcount of a node - * @node: Node to inc refcount, NULL is supported to - * simplify writing of callers - * - * Returns node. - */ -struct device_node *of_node_get(struct device_node *node) -{ - if (node) - kref_get(&node->kref); - return node; -} -EXPORT_SYMBOL(of_node_get); - -static inline struct device_node * kref_to_device_node(struct kref *kref) -{ - return container_of(kref, struct device_node, kref); -} - -/** - * of_node_release - release a dynamically allocated node - * @kref: kref element of the node to be released - * - * In of_node_put() this function is passed to kref_put() - * as the destructor. - */ -static void of_node_release(struct kref *kref) -{ - struct device_node *node = kref_to_device_node(kref); - struct property *prop = node->properties; - - if (!OF_IS_DYNAMIC(node)) - return; - while (prop) { - struct property *next = prop->next; - kfree(prop->name); - kfree(prop->value); - kfree(prop); - prop = next; - } - kfree(node->intrs); - kfree(node->addrs); - kfree(node->full_name); - kfree(node->data); - kfree(node); -} - -/** - * of_node_put - Decrement refcount of a node - * @node: Node to dec refcount, NULL is supported to - * simplify writing of callers - * - */ -void of_node_put(struct device_node *node) -{ - if (node) - kref_put(&node->kref, of_node_release); -} -EXPORT_SYMBOL(of_node_put); - -/* - * Fix up the uninitialized fields in a new device node: - * name, type, n_addrs, addrs, n_intrs, intrs, and pci-specific fields - * - * A lot of boot-time code is duplicated here, because functions such - * as finish_node_interrupts, interpret_pci_props, etc. cannot use the - * slab allocator. - * - * This should probably be split up into smaller chunks. - */ - -static int of_finish_dynamic_node(struct device_node *node, - unsigned long *unused1, int unused2, - int unused3, int unused4) -{ - struct device_node *parent = of_get_parent(node); - int err = 0; - phandle *ibm_phandle; - - node->name = get_property(node, "name", NULL); - node->type = get_property(node, "device_type", NULL); - - if (!parent) { - err = -ENODEV; - goto out; - } - - /* We don't support that function on PowerMac, at least - * not yet - */ - if (_machine == PLATFORM_POWERMAC) - return -ENODEV; - - /* fix up new node's linux_phandle field */ - if ((ibm_phandle = (unsigned int *)get_property(node, "ibm,phandle", NULL))) - node->linux_phandle = *ibm_phandle; - -out: - of_node_put(parent); - return err; -} - -/* - * Plug a device node into the tree and global list. - */ -void of_attach_node(struct device_node *np) -{ - write_lock(&devtree_lock); - np->sibling = np->parent->child; - np->allnext = allnodes; - np->parent->child = np; - allnodes = np; - write_unlock(&devtree_lock); -} - -/* - * "Unplug" a node from the device tree. The caller must hold - * a reference to the node. The memory associated with the node - * is not freed until its refcount goes to zero. - */ -void of_detach_node(const struct device_node *np) -{ - struct device_node *parent; - - write_lock(&devtree_lock); - - parent = np->parent; - - if (allnodes == np) - allnodes = np->allnext; - else { - struct device_node *prev; - for (prev = allnodes; - prev->allnext != np; - prev = prev->allnext) - ; - prev->allnext = np->allnext; - } - - if (parent->child == np) - parent->child = np->sibling; - else { - struct device_node *prevsib; - for (prevsib = np->parent->child; - prevsib->sibling != np; - prevsib = prevsib->sibling) - ; - prevsib->sibling = np->sibling; - } - - write_unlock(&devtree_lock); -} - -static int prom_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node) -{ - int err; - - switch (action) { - case PSERIES_RECONFIG_ADD: - err = finish_node(node, NULL, of_finish_dynamic_node, 0, 0, 0); - if (err < 0) { - printk(KERN_ERR "finish_node returned %d\n", err); - err = NOTIFY_BAD; - } - break; - default: - err = NOTIFY_DONE; - break; - } - return err; -} - -static struct notifier_block prom_reconfig_nb = { - .notifier_call = prom_reconfig_notifier, - .priority = 10, /* This one needs to run first */ -}; - -static int __init prom_reconfig_setup(void) -{ - return pSeries_reconfig_notifier_register(&prom_reconfig_nb); -} -__initcall(prom_reconfig_setup); - -/* - * Find a property with a given name for a given node - * and return the value. - */ -unsigned char * -get_property(struct device_node *np, const char *name, int *lenp) -{ - struct property *pp; - - for (pp = np->properties; pp != 0; pp = pp->next) - if (strcmp(pp->name, name) == 0) { - if (lenp != 0) - *lenp = pp->length; - return pp->value; - } - return NULL; -} -EXPORT_SYMBOL(get_property); - -/* - * Add a property to a node. - */ -int -prom_add_property(struct device_node* np, struct property* prop) -{ - struct property **next; - - prop->next = NULL; - write_lock(&devtree_lock); - next = &np->properties; - while (*next) { - if (strcmp(prop->name, (*next)->name) == 0) { - /* duplicate ! don't insert it */ - write_unlock(&devtree_lock); - return -1; - } - next = &(*next)->next; - } - *next = prop; - write_unlock(&devtree_lock); - - /* try to add to proc as well if it was initialized */ - if (np->pde) - proc_device_tree_add_prop(np->pde, prop); - - return 0; -} - -#if 0 -void -print_properties(struct device_node *np) -{ - struct property *pp; - char *cp; - int i, n; - - for (pp = np->properties; pp != 0; pp = pp->next) { - printk(KERN_INFO "%s", pp->name); - for (i = strlen(pp->name); i < 16; ++i) - printk(" "); - cp = (char *) pp->value; - for (i = pp->length; i > 0; --i, ++cp) - if ((i > 1 && (*cp < 0x20 || *cp > 0x7e)) - || (i == 1 && *cp != 0)) - break; - if (i == 0 && pp->length > 1) { - /* looks like a string */ - printk(" %s\n", (char *) pp->value); - } else { - /* dump it in hex */ - n = pp->length; - if (n > 64) - n = 64; - if (pp->length % 4 == 0) { - unsigned int *p = (unsigned int *) pp->value; - - n /= 4; - for (i = 0; i < n; ++i) { - if (i != 0 && (i % 4) == 0) - printk("\n "); - printk(" %08x", *p++); - } - } else { - unsigned char *bp = pp->value; - - for (i = 0; i < n; ++i) { - if (i != 0 && (i % 16) == 0) - printk("\n "); - printk(" %02x", *bp++); - } - } - printk("\n"); - if (pp->length > 64) - printk(" ... (length = %d)\n", - pp->length); - } - } -} -#endif - - - - - - - - - - diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c deleted file mode 100644 index 6375f40b23db..000000000000 --- a/arch/ppc64/kernel/prom_init.c +++ /dev/null @@ -1,2051 +0,0 @@ -/* - * - * - * Procedures for interfacing to Open Firmware. - * - * Paul Mackerras August 1996. - * Copyright (C) 1996 Paul Mackerras. - * - * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner. - * {engebret|bergner}@us.ibm.com - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#undef DEBUG_PROM - -#include <stdarg.h> -#include <linux/config.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/init.h> -#include <linux/threads.h> -#include <linux/spinlock.h> -#include <linux/types.h> -#include <linux/pci.h> -#include <linux/proc_fs.h> -#include <linux/stringify.h> -#include <linux/delay.h> -#include <linux/initrd.h> -#include <linux/bitops.h> -#include <asm/prom.h> -#include <asm/rtas.h> -#include <asm/abs_addr.h> -#include <asm/page.h> -#include <asm/processor.h> -#include <asm/irq.h> -#include <asm/io.h> -#include <asm/smp.h> -#include <asm/system.h> -#include <asm/mmu.h> -#include <asm/pgtable.h> -#include <asm/pci.h> -#include <asm/iommu.h> -#include <asm/btext.h> -#include <asm/sections.h> -#include <asm/machdep.h> - -#ifdef CONFIG_LOGO_LINUX_CLUT224 -#include <linux/linux_logo.h> -extern const struct linux_logo logo_linux_clut224; -#endif - -/* - * Properties whose value is longer than this get excluded from our - * copy of the device tree. This value does need to be big enough to - * ensure that we don't lose things like the interrupt-map property - * on a PCI-PCI bridge. - */ -#define MAX_PROPERTY_LENGTH (1UL * 1024 * 1024) - -/* - * Eventually bump that one up - */ -#define DEVTREE_CHUNK_SIZE 0x100000 - -/* - * This is the size of the local memory reserve map that gets copied - * into the boot params passed to the kernel. That size is totally - * flexible as the kernel just reads the list until it encounters an - * entry with size 0, so it can be changed without breaking binary - * compatibility - */ -#define MEM_RESERVE_MAP_SIZE 8 - -/* - * prom_init() is called very early on, before the kernel text - * and data have been mapped to KERNELBASE. At this point the code - * is running at whatever address it has been loaded at, so - * references to extern and static variables must be relocated - * explicitly. The procedure reloc_offset() returns the address - * we're currently running at minus the address we were linked at. - * (Note that strings count as static variables.) - * - * Because OF may have mapped I/O devices into the area starting at - * KERNELBASE, particularly on CHRP machines, we can't safely call - * OF once the kernel has been mapped to KERNELBASE. Therefore all - * OF calls should be done within prom_init(), and prom_init() - * and all routines called within it must be careful to relocate - * references as necessary. - * - * Note that the bss is cleared *after* prom_init runs, so we have - * to make sure that any static or extern variables it accesses - * are put in the data segment. - */ - - -#define PROM_BUG() do { \ - prom_printf("kernel BUG at %s line 0x%x!\n", \ - RELOC(__FILE__), __LINE__); \ - __asm__ __volatile__(".long " BUG_ILLEGAL_INSTR); \ -} while (0) - -#ifdef DEBUG_PROM -#define prom_debug(x...) prom_printf(x) -#else -#define prom_debug(x...) -#endif - - -typedef u32 prom_arg_t; - -struct prom_args { - u32 service; - u32 nargs; - u32 nret; - prom_arg_t args[10]; - prom_arg_t *rets; /* Pointer to return values in args[16]. */ -}; - -struct prom_t { - unsigned long entry; - ihandle root; - ihandle chosen; - int cpu; - ihandle stdout; - ihandle disp_node; - struct prom_args args; - unsigned long version; - unsigned long root_size_cells; - unsigned long root_addr_cells; -}; - -struct pci_reg_property { - struct pci_address addr; - u32 size_hi; - u32 size_lo; -}; - -struct mem_map_entry { - u64 base; - u64 size; -}; - -typedef u32 cell_t; - -extern void __start(unsigned long r3, unsigned long r4, unsigned long r5); - -extern void enter_prom(struct prom_args *args, unsigned long entry); -extern void copy_and_flush(unsigned long dest, unsigned long src, - unsigned long size, unsigned long offset); - -extern unsigned long klimit; - -/* prom structure */ -static struct prom_t __initdata prom; - -#define PROM_SCRATCH_SIZE 256 - -static char __initdata of_stdout_device[256]; -static char __initdata prom_scratch[PROM_SCRATCH_SIZE]; - -static unsigned long __initdata dt_header_start; -static unsigned long __initdata dt_struct_start, dt_struct_end; -static unsigned long __initdata dt_string_start, dt_string_end; - -static unsigned long __initdata prom_initrd_start, prom_initrd_end; - -static int __initdata iommu_force_on; -static int __initdata ppc64_iommu_off; -static int __initdata of_platform; - -static char __initdata prom_cmd_line[COMMAND_LINE_SIZE]; - -static unsigned long __initdata prom_memory_limit; -static unsigned long __initdata prom_tce_alloc_start; -static unsigned long __initdata prom_tce_alloc_end; - -static unsigned long __initdata alloc_top; -static unsigned long __initdata alloc_top_high; -static unsigned long __initdata alloc_bottom; -static unsigned long __initdata rmo_top; -static unsigned long __initdata ram_top; - -static struct mem_map_entry __initdata mem_reserve_map[MEM_RESERVE_MAP_SIZE]; -static int __initdata mem_reserve_cnt; - -static cell_t __initdata regbuf[1024]; - - -#define MAX_CPU_THREADS 2 - -/* TO GO */ -#ifdef CONFIG_HMT -struct { - unsigned int pir; - unsigned int threadid; -} hmt_thread_data[NR_CPUS]; -#endif /* CONFIG_HMT */ - -/* - * This are used in calls to call_prom. The 4th and following - * arguments to call_prom should be 32-bit values. 64 bit values - * are truncated to 32 bits (and fortunately don't get interpreted - * as two arguments). - */ -#define ADDR(x) (u32) ((unsigned long)(x) - offset) - -/* - * Error results ... some OF calls will return "-1" on error, some - * will return 0, some will return either. To simplify, here are - * macros to use with any ihandle or phandle return value to check if - * it is valid - */ - -#define PROM_ERROR (-1u) -#define PHANDLE_VALID(p) ((p) != 0 && (p) != PROM_ERROR) -#define IHANDLE_VALID(i) ((i) != 0 && (i) != PROM_ERROR) - - -/* This is the one and *ONLY* place where we actually call open - * firmware from, since we need to make sure we're running in 32b - * mode when we do. We switch back to 64b mode upon return. - */ - -static int __init call_prom(const char *service, int nargs, int nret, ...) -{ - int i; - unsigned long offset = reloc_offset(); - struct prom_t *_prom = PTRRELOC(&prom); - va_list list; - - _prom->args.service = ADDR(service); - _prom->args.nargs = nargs; - _prom->args.nret = nret; - _prom->args.rets = (prom_arg_t *)&(_prom->args.args[nargs]); - - va_start(list, nret); - for (i=0; i < nargs; i++) - _prom->args.args[i] = va_arg(list, prom_arg_t); - va_end(list); - - for (i=0; i < nret ;i++) - _prom->args.rets[i] = 0; - - enter_prom(&_prom->args, _prom->entry); - - return (nret > 0) ? _prom->args.rets[0] : 0; -} - - -static unsigned int __init prom_claim(unsigned long virt, unsigned long size, - unsigned long align) -{ - return (unsigned int)call_prom("claim", 3, 1, - (prom_arg_t)virt, (prom_arg_t)size, - (prom_arg_t)align); -} - -static void __init prom_print(const char *msg) -{ - const char *p, *q; - unsigned long offset = reloc_offset(); - struct prom_t *_prom = PTRRELOC(&prom); - - if (_prom->stdout == 0) - return; - - for (p = msg; *p != 0; p = q) { - for (q = p; *q != 0 && *q != '\n'; ++q) - ; - if (q > p) - call_prom("write", 3, 1, _prom->stdout, p, q - p); - if (*q == 0) - break; - ++q; - call_prom("write", 3, 1, _prom->stdout, ADDR("\r\n"), 2); - } -} - - -static void __init prom_print_hex(unsigned long val) -{ - unsigned long offset = reloc_offset(); - int i, nibbles = sizeof(val)*2; - char buf[sizeof(val)*2+1]; - struct prom_t *_prom = PTRRELOC(&prom); - - for (i = nibbles-1; i >= 0; i--) { - buf[i] = (val & 0xf) + '0'; - if (buf[i] > '9') - buf[i] += ('a'-'0'-10); - val >>= 4; - } - buf[nibbles] = '\0'; - call_prom("write", 3, 1, _prom->stdout, buf, nibbles); -} - - -static void __init prom_printf(const char *format, ...) -{ - unsigned long offset = reloc_offset(); - const char *p, *q, *s; - va_list args; - unsigned long v; - struct prom_t *_prom = PTRRELOC(&prom); - - va_start(args, format); - for (p = PTRRELOC(format); *p != 0; p = q) { - for (q = p; *q != 0 && *q != '\n' && *q != '%'; ++q) - ; - if (q > p) - call_prom("write", 3, 1, _prom->stdout, p, q - p); - if (*q == 0) - break; - if (*q == '\n') { - ++q; - call_prom("write", 3, 1, _prom->stdout, - ADDR("\r\n"), 2); - continue; - } - ++q; - if (*q == 0) - break; - switch (*q) { - case 's': - ++q; - s = va_arg(args, const char *); - prom_print(s); - break; - case 'x': - ++q; - v = va_arg(args, unsigned long); - prom_print_hex(v); - break; - } - } -} - - -static void __init __attribute__((noreturn)) prom_panic(const char *reason) -{ - unsigned long offset = reloc_offset(); - - prom_print(PTRRELOC(reason)); - /* ToDo: should put up an SRC here */ - call_prom("exit", 0, 0); - - for (;;) /* should never get here */ - ; -} - - -static int __init prom_next_node(phandle *nodep) -{ - phandle node; - - if ((node = *nodep) != 0 - && (*nodep = call_prom("child", 1, 1, node)) != 0) - return 1; - if ((*nodep = call_prom("peer", 1, 1, node)) != 0) - return 1; - for (;;) { - if ((node = call_prom("parent", 1, 1, node)) == 0) - return 0; - if ((*nodep = call_prom("peer", 1, 1, node)) != 0) - return 1; - } -} - -static int __init prom_getprop(phandle node, const char *pname, - void *value, size_t valuelen) -{ - unsigned long offset = reloc_offset(); - - return call_prom("getprop", 4, 1, node, ADDR(pname), - (u32)(unsigned long) value, (u32) valuelen); -} - -static int __init prom_getproplen(phandle node, const char *pname) -{ - unsigned long offset = reloc_offset(); - - return call_prom("getproplen", 2, 1, node, ADDR(pname)); -} - -static int __init prom_setprop(phandle node, const char *pname, - void *value, size_t valuelen) -{ - unsigned long offset = reloc_offset(); - - return call_prom("setprop", 4, 1, node, ADDR(pname), - (u32)(unsigned long) value, (u32) valuelen); -} - -/* We can't use the standard versions because of RELOC headaches. */ -#define isxdigit(c) (('0' <= (c) && (c) <= '9') \ - || ('a' <= (c) && (c) <= 'f') \ - || ('A' <= (c) && (c) <= 'F')) - -#define isdigit(c) ('0' <= (c) && (c) <= '9') -#define islower(c) ('a' <= (c) && (c) <= 'z') -#define toupper(c) (islower(c) ? ((c) - 'a' + 'A') : (c)) - -unsigned long prom_strtoul(const char *cp, const char **endp) -{ - unsigned long result = 0, base = 10, value; - - if (*cp == '0') { - base = 8; - cp++; - if (toupper(*cp) == 'X') { - cp++; - base = 16; - } - } - - while (isxdigit(*cp) && - (value = isdigit(*cp) ? *cp - '0' : toupper(*cp) - 'A' + 10) < base) { - result = result * base + value; - cp++; - } - - if (endp) - *endp = cp; - - return result; -} - -unsigned long prom_memparse(const char *ptr, const char **retptr) -{ - unsigned long ret = prom_strtoul(ptr, retptr); - int shift = 0; - - /* - * We can't use a switch here because GCC *may* generate a - * jump table which won't work, because we're not running at - * the address we're linked at. - */ - if ('G' == **retptr || 'g' == **retptr) - shift = 30; - - if ('M' == **retptr || 'm' == **retptr) - shift = 20; - - if ('K' == **retptr || 'k' == **retptr) - shift = 10; - - if (shift) { - ret <<= shift; - (*retptr)++; - } - - return ret; -} - -/* - * Early parsing of the command line passed to the kernel, used for - * "mem=x" and the options that affect the iommu - */ -static void __init early_cmdline_parse(void) -{ - unsigned long offset = reloc_offset(); - struct prom_t *_prom = PTRRELOC(&prom); - char *opt, *p; - int l = 0; - - RELOC(prom_cmd_line[0]) = 0; - p = RELOC(prom_cmd_line); - if ((long)_prom->chosen > 0) - l = prom_getprop(_prom->chosen, "bootargs", p, COMMAND_LINE_SIZE-1); -#ifdef CONFIG_CMDLINE - if (l == 0) /* dbl check */ - strlcpy(RELOC(prom_cmd_line), - RELOC(CONFIG_CMDLINE), sizeof(prom_cmd_line)); -#endif /* CONFIG_CMDLINE */ - prom_printf("command line: %s\n", RELOC(prom_cmd_line)); - - opt = strstr(RELOC(prom_cmd_line), RELOC("iommu=")); - if (opt) { - prom_printf("iommu opt is: %s\n", opt); - opt += 6; - while (*opt && *opt == ' ') - opt++; - if (!strncmp(opt, RELOC("off"), 3)) - RELOC(ppc64_iommu_off) = 1; - else if (!strncmp(opt, RELOC("force"), 5)) - RELOC(iommu_force_on) = 1; - } - - opt = strstr(RELOC(prom_cmd_line), RELOC("mem=")); - if (opt) { - opt += 4; - RELOC(prom_memory_limit) = prom_memparse(opt, (const char **)&opt); - /* Align to 16 MB == size of large page */ - RELOC(prom_memory_limit) = ALIGN(RELOC(prom_memory_limit), 0x1000000); - } -} - -/* - * To tell the firmware what our capabilities are, we have to pass - * it a fake 32-bit ELF header containing a couple of PT_NOTE sections - * that contain structures that contain the actual values. - */ -static struct fake_elf { - Elf32_Ehdr elfhdr; - Elf32_Phdr phdr[2]; - struct chrpnote { - u32 namesz; - u32 descsz; - u32 type; - char name[8]; /* "PowerPC" */ - struct chrpdesc { - u32 real_mode; - u32 real_base; - u32 real_size; - u32 virt_base; - u32 virt_size; - u32 load_base; - } chrpdesc; - } chrpnote; - struct rpanote { - u32 namesz; - u32 descsz; - u32 type; - char name[24]; /* "IBM,RPA-Client-Config" */ - struct rpadesc { - u32 lpar_affinity; - u32 min_rmo_size; - u32 min_rmo_percent; - u32 max_pft_size; - u32 splpar; - u32 min_load; - u32 new_mem_def; - u32 ignore_me; - } rpadesc; - } rpanote; -} fake_elf = { - .elfhdr = { - .e_ident = { 0x7f, 'E', 'L', 'F', - ELFCLASS32, ELFDATA2MSB, EV_CURRENT }, - .e_type = ET_EXEC, /* yeah right */ - .e_machine = EM_PPC, - .e_version = EV_CURRENT, - .e_phoff = offsetof(struct fake_elf, phdr), - .e_phentsize = sizeof(Elf32_Phdr), - .e_phnum = 2 - }, - .phdr = { - [0] = { - .p_type = PT_NOTE, - .p_offset = offsetof(struct fake_elf, chrpnote), - .p_filesz = sizeof(struct chrpnote) - }, [1] = { - .p_type = PT_NOTE, - .p_offset = offsetof(struct fake_elf, rpanote), - .p_filesz = sizeof(struct rpanote) - } - }, - .chrpnote = { - .namesz = sizeof("PowerPC"), - .descsz = sizeof(struct chrpdesc), - .type = 0x1275, - .name = "PowerPC", - .chrpdesc = { - .real_mode = ~0U, /* ~0 means "don't care" */ - .real_base = ~0U, - .real_size = ~0U, - .virt_base = ~0U, - .virt_size = ~0U, - .load_base = ~0U - }, - }, - .rpanote = { - .namesz = sizeof("IBM,RPA-Client-Config"), - .descsz = sizeof(struct rpadesc), - .type = 0x12759999, - .name = "IBM,RPA-Client-Config", - .rpadesc = { - .lpar_affinity = 0, - .min_rmo_size = 64, /* in megabytes */ - .min_rmo_percent = 0, - .max_pft_size = 48, /* 2^48 bytes max PFT size */ - .splpar = 1, - .min_load = ~0U, - .new_mem_def = 0 - } - } -}; - -static void __init prom_send_capabilities(void) -{ - unsigned long offset = reloc_offset(); - ihandle elfloader; - - elfloader = call_prom("open", 1, 1, ADDR("/packages/elf-loader")); - if (elfloader == 0) { - prom_printf("couldn't open /packages/elf-loader\n"); - return; - } - call_prom("call-method", 3, 1, ADDR("process-elf-header"), - elfloader, ADDR(&fake_elf)); - call_prom("close", 1, 0, elfloader); -} - -/* - * Memory allocation strategy... our layout is normally: - * - * at 14Mb or more we vmlinux, then a gap and initrd. In some rare cases, initrd - * might end up beeing before the kernel though. We assume this won't override - * the final kernel at 0, we have no provision to handle that in this version, - * but it should hopefully never happen. - * - * alloc_top is set to the top of RMO, eventually shrink down if the TCEs overlap - * alloc_bottom is set to the top of kernel/initrd - * - * from there, allocations are done that way : rtas is allocated topmost, and - * the device-tree is allocated from the bottom. We try to grow the device-tree - * allocation as we progress. If we can't, then we fail, we don't currently have - * a facility to restart elsewhere, but that shouldn't be necessary neither - * - * Note that calls to reserve_mem have to be done explicitely, memory allocated - * with either alloc_up or alloc_down isn't automatically reserved. - */ - - -/* - * Allocates memory in the RMO upward from the kernel/initrd - * - * When align is 0, this is a special case, it means to allocate in place - * at the current location of alloc_bottom or fail (that is basically - * extending the previous allocation). Used for the device-tree flattening - */ -static unsigned long __init alloc_up(unsigned long size, unsigned long align) -{ - unsigned long offset = reloc_offset(); - unsigned long base = _ALIGN_UP(RELOC(alloc_bottom), align); - unsigned long addr = 0; - - prom_debug("alloc_up(%x, %x)\n", size, align); - if (RELOC(ram_top) == 0) - prom_panic("alloc_up() called with mem not initialized\n"); - - if (align) - base = _ALIGN_UP(RELOC(alloc_bottom), align); - else - base = RELOC(alloc_bottom); - - for(; (base + size) <= RELOC(alloc_top); - base = _ALIGN_UP(base + 0x100000, align)) { - prom_debug(" trying: 0x%x\n\r", base); - addr = (unsigned long)prom_claim(base, size, 0); - if (addr != PROM_ERROR) - break; - addr = 0; - if (align == 0) - break; - } - if (addr == 0) - return 0; - RELOC(alloc_bottom) = addr; - - prom_debug(" -> %x\n", addr); - prom_debug(" alloc_bottom : %x\n", RELOC(alloc_bottom)); - prom_debug(" alloc_top : %x\n", RELOC(alloc_top)); - prom_debug(" alloc_top_hi : %x\n", RELOC(alloc_top_high)); - prom_debug(" rmo_top : %x\n", RELOC(rmo_top)); - prom_debug(" ram_top : %x\n", RELOC(ram_top)); - - return addr; -} - -/* - * Allocates memory downard, either from top of RMO, or if highmem - * is set, from the top of RAM. Note that this one doesn't handle - * failures. In does claim memory if highmem is not set. - */ -static unsigned long __init alloc_down(unsigned long size, unsigned long align, - int highmem) -{ - unsigned long offset = reloc_offset(); - unsigned long base, addr = 0; - - prom_debug("alloc_down(%x, %x, %s)\n", size, align, - highmem ? RELOC("(high)") : RELOC("(low)")); - if (RELOC(ram_top) == 0) - prom_panic("alloc_down() called with mem not initialized\n"); - - if (highmem) { - /* Carve out storage for the TCE table. */ - addr = _ALIGN_DOWN(RELOC(alloc_top_high) - size, align); - if (addr <= RELOC(alloc_bottom)) - return 0; - else { - /* Will we bump into the RMO ? If yes, check out that we - * didn't overlap existing allocations there, if we did, - * we are dead, we must be the first in town ! - */ - if (addr < RELOC(rmo_top)) { - /* Good, we are first */ - if (RELOC(alloc_top) == RELOC(rmo_top)) - RELOC(alloc_top) = RELOC(rmo_top) = addr; - else - return 0; - } - RELOC(alloc_top_high) = addr; - } - goto bail; - } - - base = _ALIGN_DOWN(RELOC(alloc_top) - size, align); - for(; base > RELOC(alloc_bottom); base = _ALIGN_DOWN(base - 0x100000, align)) { - prom_debug(" trying: 0x%x\n\r", base); - addr = (unsigned long)prom_claim(base, size, 0); - if (addr != PROM_ERROR) - break; - addr = 0; - } - if (addr == 0) - return 0; - RELOC(alloc_top) = addr; - - bail: - prom_debug(" -> %x\n", addr); - prom_debug(" alloc_bottom : %x\n", RELOC(alloc_bottom)); - prom_debug(" alloc_top : %x\n", RELOC(alloc_top)); - prom_debug(" alloc_top_hi : %x\n", RELOC(alloc_top_high)); - prom_debug(" rmo_top : %x\n", RELOC(rmo_top)); - prom_debug(" ram_top : %x\n", RELOC(ram_top)); - - return addr; -} - -/* - * Parse a "reg" cell - */ -static unsigned long __init prom_next_cell(int s, cell_t **cellp) -{ - cell_t *p = *cellp; - unsigned long r = 0; - - /* Ignore more than 2 cells */ - while (s > 2) { - p++; - s--; - } - while (s) { - r <<= 32; - r |= *(p++); - s--; - } - - *cellp = p; - return r; -} - -/* - * Very dumb function for adding to the memory reserve list, but - * we don't need anything smarter at this point - * - * XXX Eventually check for collisions. They should NEVER happen - * if problems seem to show up, it would be a good start to track - * them down. - */ -static void reserve_mem(unsigned long base, unsigned long size) -{ - unsigned long offset = reloc_offset(); - unsigned long top = base + size; - unsigned long cnt = RELOC(mem_reserve_cnt); - - if (size == 0) - return; - - /* We need to always keep one empty entry so that we - * have our terminator with "size" set to 0 since we are - * dumb and just copy this entire array to the boot params - */ - base = _ALIGN_DOWN(base, PAGE_SIZE); - top = _ALIGN_UP(top, PAGE_SIZE); - size = top - base; - - if (cnt >= (MEM_RESERVE_MAP_SIZE - 1)) - prom_panic("Memory reserve map exhausted !\n"); - RELOC(mem_reserve_map)[cnt].base = base; - RELOC(mem_reserve_map)[cnt].size = size; - RELOC(mem_reserve_cnt) = cnt + 1; -} - -/* - * Initialize memory allocation mecanism, parse "memory" nodes and - * obtain that way the top of memory and RMO to setup out local allocator - */ -static void __init prom_init_mem(void) -{ - phandle node; - char *path, type[64]; - unsigned int plen; - cell_t *p, *endp; - unsigned long offset = reloc_offset(); - struct prom_t *_prom = PTRRELOC(&prom); - - /* - * We iterate the memory nodes to find - * 1) top of RMO (first node) - * 2) top of memory - */ - prom_debug("root_addr_cells: %x\n", (long)_prom->root_addr_cells); - prom_debug("root_size_cells: %x\n", (long)_prom->root_size_cells); - - prom_debug("scanning memory:\n"); - path = RELOC(prom_scratch); - - for (node = 0; prom_next_node(&node); ) { - type[0] = 0; - prom_getprop(node, "device_type", type, sizeof(type)); - - if (strcmp(type, RELOC("memory"))) - continue; - - plen = prom_getprop(node, "reg", RELOC(regbuf), sizeof(regbuf)); - if (plen > sizeof(regbuf)) { - prom_printf("memory node too large for buffer !\n"); - plen = sizeof(regbuf); - } - p = RELOC(regbuf); - endp = p + (plen / sizeof(cell_t)); - -#ifdef DEBUG_PROM - memset(path, 0, PROM_SCRATCH_SIZE); - call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); - prom_debug(" node %s :\n", path); -#endif /* DEBUG_PROM */ - - while ((endp - p) >= (_prom->root_addr_cells + _prom->root_size_cells)) { - unsigned long base, size; - - base = prom_next_cell(_prom->root_addr_cells, &p); - size = prom_next_cell(_prom->root_size_cells, &p); - - if (size == 0) - continue; - prom_debug(" %x %x\n", base, size); - if (base == 0) - RELOC(rmo_top) = size; - if ((base + size) > RELOC(ram_top)) - RELOC(ram_top) = base + size; - } - } - - RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(klimit) - offset + 0x4000); - - /* Check if we have an initrd after the kernel, if we do move our bottom - * point to after it - */ - if (RELOC(prom_initrd_start)) { - if (RELOC(prom_initrd_end) > RELOC(alloc_bottom)) - RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(prom_initrd_end)); - } - - /* - * If prom_memory_limit is set we reduce the upper limits *except* for - * alloc_top_high. This must be the real top of RAM so we can put - * TCE's up there. - */ - - RELOC(alloc_top_high) = RELOC(ram_top); - - if (RELOC(prom_memory_limit)) { - if (RELOC(prom_memory_limit) <= RELOC(alloc_bottom)) { - prom_printf("Ignoring mem=%x <= alloc_bottom.\n", - RELOC(prom_memory_limit)); - RELOC(prom_memory_limit) = 0; - } else if (RELOC(prom_memory_limit) >= RELOC(ram_top)) { - prom_printf("Ignoring mem=%x >= ram_top.\n", - RELOC(prom_memory_limit)); - RELOC(prom_memory_limit) = 0; - } else { - RELOC(ram_top) = RELOC(prom_memory_limit); - RELOC(rmo_top) = min(RELOC(rmo_top), RELOC(prom_memory_limit)); - } - } - - /* - * Setup our top alloc point, that is top of RMO or top of - * segment 0 when running non-LPAR. - */ - if ( RELOC(of_platform) == PLATFORM_PSERIES_LPAR ) - RELOC(alloc_top) = RELOC(rmo_top); - else - /* Some RS64 machines have buggy firmware where claims up at 1GB - * fails. Cap at 768MB as a workaround. Still plenty of room. - */ - RELOC(alloc_top) = RELOC(rmo_top) = min(0x30000000ul, RELOC(ram_top)); - - prom_printf("memory layout at init:\n"); - prom_printf(" memory_limit : %x (16 MB aligned)\n", RELOC(prom_memory_limit)); - prom_printf(" alloc_bottom : %x\n", RELOC(alloc_bottom)); - prom_printf(" alloc_top : %x\n", RELOC(alloc_top)); - prom_printf(" alloc_top_hi : %x\n", RELOC(alloc_top_high)); - prom_printf(" rmo_top : %x\n", RELOC(rmo_top)); - prom_printf(" ram_top : %x\n", RELOC(ram_top)); -} - - -/* - * Allocate room for and instanciate RTAS - */ -static void __init prom_instantiate_rtas(void) -{ - unsigned long offset = reloc_offset(); - struct prom_t *_prom = PTRRELOC(&prom); - phandle rtas_node; - ihandle rtas_inst; - u32 base, entry = 0; - u32 size = 0; - - prom_debug("prom_instantiate_rtas: start...\n"); - - rtas_node = call_prom("finddevice", 1, 1, ADDR("/rtas")); - prom_debug("rtas_node: %x\n", rtas_node); - if (!PHANDLE_VALID(rtas_node)) - return; - - prom_getprop(rtas_node, "rtas-size", &size, sizeof(size)); - if (size == 0) - return; - - base = alloc_down(size, PAGE_SIZE, 0); - if (base == 0) { - prom_printf("RTAS allocation failed !\n"); - return; - } - - rtas_inst = call_prom("open", 1, 1, ADDR("/rtas")); - if (!IHANDLE_VALID(rtas_inst)) { - prom_printf("opening rtas package failed"); - return; - } - - prom_printf("instantiating rtas at 0x%x ...", base); - - if (call_prom("call-method", 3, 2, - ADDR("instantiate-rtas"), - rtas_inst, base) != PROM_ERROR) { - entry = (long)_prom->args.rets[1]; - } - if (entry == 0) { - prom_printf(" failed\n"); - return; - } - prom_printf(" done\n"); - - reserve_mem(base, size); - - prom_setprop(rtas_node, "linux,rtas-base", &base, sizeof(base)); - prom_setprop(rtas_node, "linux,rtas-entry", &entry, sizeof(entry)); - - prom_debug("rtas base = 0x%x\n", base); - prom_debug("rtas entry = 0x%x\n", entry); - prom_debug("rtas size = 0x%x\n", (long)size); - - prom_debug("prom_instantiate_rtas: end...\n"); -} - - -/* - * Allocate room for and initialize TCE tables - */ -static void __init prom_initialize_tce_table(void) -{ - phandle node; - ihandle phb_node; - unsigned long offset = reloc_offset(); - char compatible[64], type[64], model[64]; - char *path = RELOC(prom_scratch); - u64 base, align; - u32 minalign, minsize; - u64 tce_entry, *tce_entryp; - u64 local_alloc_top, local_alloc_bottom; - u64 i; - - if (RELOC(ppc64_iommu_off)) - return; - - prom_debug("starting prom_initialize_tce_table\n"); - - /* Cache current top of allocs so we reserve a single block */ - local_alloc_top = RELOC(alloc_top_high); - local_alloc_bottom = local_alloc_top; - - /* Search all nodes looking for PHBs. */ - for (node = 0; prom_next_node(&node); ) { - compatible[0] = 0; - type[0] = 0; - model[0] = 0; - prom_getprop(node, "compatible", - compatible, sizeof(compatible)); - prom_getprop(node, "device_type", type, sizeof(type)); - prom_getprop(node, "model", model, sizeof(model)); - - if ((type[0] == 0) || (strstr(type, RELOC("pci")) == NULL)) - continue; - - /* Keep the old logic in tack to avoid regression. */ - if (compatible[0] != 0) { - if ((strstr(compatible, RELOC("python")) == NULL) && - (strstr(compatible, RELOC("Speedwagon")) == NULL) && - (strstr(compatible, RELOC("Winnipeg")) == NULL)) - continue; - } else if (model[0] != 0) { - if ((strstr(model, RELOC("ython")) == NULL) && - (strstr(model, RELOC("peedwagon")) == NULL) && - (strstr(model, RELOC("innipeg")) == NULL)) - continue; - } - - if (prom_getprop(node, "tce-table-minalign", &minalign, - sizeof(minalign)) == PROM_ERROR) - minalign = 0; - if (prom_getprop(node, "tce-table-minsize", &minsize, - sizeof(minsize)) == PROM_ERROR) - minsize = 4UL << 20; - - /* - * Even though we read what OF wants, we just set the table - * size to 4 MB. This is enough to map 2GB of PCI DMA space. - * By doing this, we avoid the pitfalls of trying to DMA to - * MMIO space and the DMA alias hole. - * - * On POWER4, firmware sets the TCE region by assuming - * each TCE table is 8MB. Using this memory for anything - * else will impact performance, so we always allocate 8MB. - * Anton - */ - if (__is_processor(PV_POWER4) || __is_processor(PV_POWER4p)) - minsize = 8UL << 20; - else - minsize = 4UL << 20; - - /* Align to the greater of the align or size */ - align = max(minalign, minsize); - base = alloc_down(minsize, align, 1); - if (base == 0) - prom_panic("ERROR, cannot find space for TCE table.\n"); - if (base < local_alloc_bottom) - local_alloc_bottom = base; - - /* Save away the TCE table attributes for later use. */ - prom_setprop(node, "linux,tce-base", &base, sizeof(base)); - prom_setprop(node, "linux,tce-size", &minsize, sizeof(minsize)); - - /* It seems OF doesn't null-terminate the path :-( */ - memset(path, 0, sizeof(path)); - /* Call OF to setup the TCE hardware */ - if (call_prom("package-to-path", 3, 1, node, - path, PROM_SCRATCH_SIZE-1) == PROM_ERROR) { - prom_printf("package-to-path failed\n"); - } - - prom_debug("TCE table: %s\n", path); - prom_debug("\tnode = 0x%x\n", node); - prom_debug("\tbase = 0x%x\n", base); - prom_debug("\tsize = 0x%x\n", minsize); - - /* Initialize the table to have a one-to-one mapping - * over the allocated size. - */ - tce_entryp = (unsigned long *)base; - for (i = 0; i < (minsize >> 3) ;tce_entryp++, i++) { - tce_entry = (i << PAGE_SHIFT); - tce_entry |= 0x3; - *tce_entryp = tce_entry; - } - - prom_printf("opening PHB %s", path); - phb_node = call_prom("open", 1, 1, path); - if (phb_node == 0) - prom_printf("... failed\n"); - else - prom_printf("... done\n"); - - call_prom("call-method", 6, 0, ADDR("set-64-bit-addressing"), - phb_node, -1, minsize, - (u32) base, (u32) (base >> 32)); - call_prom("close", 1, 0, phb_node); - } - - reserve_mem(local_alloc_bottom, local_alloc_top - local_alloc_bottom); - - if (RELOC(prom_memory_limit)) { - /* - * We align the start to a 16MB boundary so we can map the TCE area - * using large pages if possible. The end should be the top of RAM - * so no need to align it. - */ - RELOC(prom_tce_alloc_start) = _ALIGN_DOWN(local_alloc_bottom, 0x1000000); - RELOC(prom_tce_alloc_end) = local_alloc_top; - } - - /* Flag the first invalid entry */ - prom_debug("ending prom_initialize_tce_table\n"); -} - -/* - * With CHRP SMP we need to use the OF to start the other - * processors so we can't wait until smp_boot_cpus (the OF is - * trashed by then) so we have to put the processors into - * a holding pattern controlled by the kernel (not OF) before - * we destroy the OF. - * - * This uses a chunk of low memory, puts some holding pattern - * code there and sends the other processors off to there until - * smp_boot_cpus tells them to do something. The holding pattern - * checks that address until its cpu # is there, when it is that - * cpu jumps to __secondary_start(). smp_boot_cpus() takes care - * of setting those values. - * - * We also use physical address 0x4 here to tell when a cpu - * is in its holding pattern code. - * - * Fixup comment... DRENG / PPPBBB - Peter - * - * -- Cort - */ -static void __init prom_hold_cpus(void) -{ - unsigned long i; - unsigned int reg; - phandle node; - unsigned long offset = reloc_offset(); - char type[64]; - int cpuid = 0; - unsigned int interrupt_server[MAX_CPU_THREADS]; - unsigned int cpu_threads, hw_cpu_num; - int propsize; - extern void __secondary_hold(void); - extern unsigned long __secondary_hold_spinloop; - extern unsigned long __secondary_hold_acknowledge; - unsigned long *spinloop - = (void *)virt_to_abs(&__secondary_hold_spinloop); - unsigned long *acknowledge - = (void *)virt_to_abs(&__secondary_hold_acknowledge); - unsigned long secondary_hold - = virt_to_abs(*PTRRELOC((unsigned long *)__secondary_hold)); - struct prom_t *_prom = PTRRELOC(&prom); - - prom_debug("prom_hold_cpus: start...\n"); - prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop); - prom_debug(" 1) *spinloop = 0x%x\n", *spinloop); - prom_debug(" 1) acknowledge = 0x%x\n", - (unsigned long)acknowledge); - prom_debug(" 1) *acknowledge = 0x%x\n", *acknowledge); - prom_debug(" 1) secondary_hold = 0x%x\n", secondary_hold); - - /* Set the common spinloop variable, so all of the secondary cpus - * will block when they are awakened from their OF spinloop. - * This must occur for both SMP and non SMP kernels, since OF will - * be trashed when we move the kernel. - */ - *spinloop = 0; - -#ifdef CONFIG_HMT - for (i=0; i < NR_CPUS; i++) { - RELOC(hmt_thread_data)[i].pir = 0xdeadbeef; - } -#endif - /* look for cpus */ - for (node = 0; prom_next_node(&node); ) { - type[0] = 0; - prom_getprop(node, "device_type", type, sizeof(type)); - if (strcmp(type, RELOC("cpu")) != 0) - continue; - - /* Skip non-configured cpus. */ - if (prom_getprop(node, "status", type, sizeof(type)) > 0) - if (strcmp(type, RELOC("okay")) != 0) - continue; - - reg = -1; - prom_getprop(node, "reg", ®, sizeof(reg)); - - prom_debug("\ncpuid = 0x%x\n", cpuid); - prom_debug("cpu hw idx = 0x%x\n", reg); - - /* Init the acknowledge var which will be reset by - * the secondary cpu when it awakens from its OF - * spinloop. - */ - *acknowledge = (unsigned long)-1; - - propsize = prom_getprop(node, "ibm,ppc-interrupt-server#s", - &interrupt_server, - sizeof(interrupt_server)); - if (propsize < 0) { - /* no property. old hardware has no SMT */ - cpu_threads = 1; - interrupt_server[0] = reg; /* fake it with phys id */ - } else { - /* We have a threaded processor */ - cpu_threads = propsize / sizeof(u32); - if (cpu_threads > MAX_CPU_THREADS) { - prom_printf("SMT: too many threads!\n" - "SMT: found %x, max is %x\n", - cpu_threads, MAX_CPU_THREADS); - cpu_threads = 1; /* ToDo: panic? */ - } - } - - hw_cpu_num = interrupt_server[0]; - if (hw_cpu_num != _prom->cpu) { - /* Primary Thread of non-boot cpu */ - prom_printf("%x : starting cpu hw idx %x... ", cpuid, reg); - call_prom("start-cpu", 3, 0, node, - secondary_hold, reg); - - for ( i = 0 ; (i < 100000000) && - (*acknowledge == ((unsigned long)-1)); i++ ) - mb(); - - if (*acknowledge == reg) { - prom_printf("done\n"); - /* We have to get every CPU out of OF, - * even if we never start it. */ - if (cpuid >= NR_CPUS) - goto next; - } else { - prom_printf("failed: %x\n", *acknowledge); - } - } -#ifdef CONFIG_SMP - else - prom_printf("%x : boot cpu %x\n", cpuid, reg); -#endif -next: -#ifdef CONFIG_SMP - /* Init paca for secondary threads. They start later. */ - for (i=1; i < cpu_threads; i++) { - cpuid++; - if (cpuid >= NR_CPUS) - continue; - } -#endif /* CONFIG_SMP */ - cpuid++; - } -#ifdef CONFIG_HMT - /* Only enable HMT on processors that provide support. */ - if (__is_processor(PV_PULSAR) || - __is_processor(PV_ICESTAR) || - __is_processor(PV_SSTAR)) { - prom_printf(" starting secondary threads\n"); - - for (i = 0; i < NR_CPUS; i += 2) { - if (!cpu_online(i)) - continue; - - if (i == 0) { - unsigned long pir = mfspr(SPRN_PIR); - if (__is_processor(PV_PULSAR)) { - RELOC(hmt_thread_data)[i].pir = - pir & 0x1f; - } else { - RELOC(hmt_thread_data)[i].pir = - pir & 0x3ff; - } - } - } - } else { - prom_printf("Processor is not HMT capable\n"); - } -#endif - - if (cpuid > NR_CPUS) - prom_printf("WARNING: maximum CPUs (" __stringify(NR_CPUS) - ") exceeded: ignoring extras\n"); - - prom_debug("prom_hold_cpus: end...\n"); -} - - -static void __init prom_init_client_services(unsigned long pp) -{ - unsigned long offset = reloc_offset(); - struct prom_t *_prom = PTRRELOC(&prom); - - /* Get a handle to the prom entry point before anything else */ - _prom->entry = pp; - - /* Init default value for phys size */ - _prom->root_size_cells = 1; - _prom->root_addr_cells = 2; - - /* get a handle for the stdout device */ - _prom->chosen = call_prom("finddevice", 1, 1, ADDR("/chosen")); - if (!PHANDLE_VALID(_prom->chosen)) - prom_panic("cannot find chosen"); /* msg won't be printed :( */ - - /* get device tree root */ - _prom->root = call_prom("finddevice", 1, 1, ADDR("/")); - if (!PHANDLE_VALID(_prom->root)) - prom_panic("cannot find device tree root"); /* msg won't be printed :( */ -} - -static void __init prom_init_stdout(void) -{ - unsigned long offset = reloc_offset(); - struct prom_t *_prom = PTRRELOC(&prom); - char *path = RELOC(of_stdout_device); - char type[16]; - u32 val; - - if (prom_getprop(_prom->chosen, "stdout", &val, sizeof(val)) <= 0) - prom_panic("cannot find stdout"); - - _prom->stdout = val; - - /* Get the full OF pathname of the stdout device */ - memset(path, 0, 256); - call_prom("instance-to-path", 3, 1, _prom->stdout, path, 255); - val = call_prom("instance-to-package", 1, 1, _prom->stdout); - prom_setprop(_prom->chosen, "linux,stdout-package", &val, sizeof(val)); - prom_printf("OF stdout device is: %s\n", RELOC(of_stdout_device)); - prom_setprop(_prom->chosen, "linux,stdout-path", - RELOC(of_stdout_device), strlen(RELOC(of_stdout_device))+1); - - /* If it's a display, note it */ - memset(type, 0, sizeof(type)); - prom_getprop(val, "device_type", type, sizeof(type)); - if (strcmp(type, RELOC("display")) == 0) { - _prom->disp_node = val; - prom_setprop(val, "linux,boot-display", NULL, 0); - } -} - -static void __init prom_close_stdin(void) -{ - unsigned long offset = reloc_offset(); - struct prom_t *_prom = PTRRELOC(&prom); - ihandle val; - - if (prom_getprop(_prom->chosen, "stdin", &val, sizeof(val)) > 0) - call_prom("close", 1, 0, val); -} - -static int __init prom_find_machine_type(void) -{ - unsigned long offset = reloc_offset(); - struct prom_t *_prom = PTRRELOC(&prom); - char compat[256]; - int len, i = 0; - phandle rtas; - - len = prom_getprop(_prom->root, "compatible", - compat, sizeof(compat)-1); - if (len > 0) { - compat[len] = 0; - while (i < len) { - char *p = &compat[i]; - int sl = strlen(p); - if (sl == 0) - break; - if (strstr(p, RELOC("Power Macintosh")) || - strstr(p, RELOC("MacRISC4"))) - return PLATFORM_POWERMAC; - if (strstr(p, RELOC("Momentum,Maple"))) - return PLATFORM_MAPLE; - i += sl + 1; - } - } - /* Default to pSeries. We need to know if we are running LPAR */ - rtas = call_prom("finddevice", 1, 1, ADDR("/rtas")); - if (PHANDLE_VALID(rtas)) { - int x = prom_getproplen(rtas, "ibm,hypertas-functions"); - if (x != PROM_ERROR) { - prom_printf("Hypertas detected, assuming LPAR !\n"); - return PLATFORM_PSERIES_LPAR; - } - } - return PLATFORM_PSERIES; -} - -static int __init prom_set_color(ihandle ih, int i, int r, int g, int b) -{ - unsigned long offset = reloc_offset(); - - return call_prom("call-method", 6, 1, ADDR("color!"), ih, i, b, g, r); -} - -/* - * If we have a display that we don't know how to drive, - * we will want to try to execute OF's open method for it - * later. However, OF will probably fall over if we do that - * we've taken over the MMU. - * So we check whether we will need to open the display, - * and if so, open it now. - */ -static void __init prom_check_displays(void) -{ - unsigned long offset = reloc_offset(); - struct prom_t *_prom = PTRRELOC(&prom); - char type[16], *path; - phandle node; - ihandle ih; - int i; - - static unsigned char default_colors[] = { - 0x00, 0x00, 0x00, - 0x00, 0x00, 0xaa, - 0x00, 0xaa, 0x00, - 0x00, 0xaa, 0xaa, - 0xaa, 0x00, 0x00, - 0xaa, 0x00, 0xaa, - 0xaa, 0xaa, 0x00, - 0xaa, 0xaa, 0xaa, - 0x55, 0x55, 0x55, - 0x55, 0x55, 0xff, - 0x55, 0xff, 0x55, - 0x55, 0xff, 0xff, - 0xff, 0x55, 0x55, - 0xff, 0x55, 0xff, - 0xff, 0xff, 0x55, - 0xff, 0xff, 0xff - }; - const unsigned char *clut; - - prom_printf("Looking for displays\n"); - for (node = 0; prom_next_node(&node); ) { - memset(type, 0, sizeof(type)); - prom_getprop(node, "device_type", type, sizeof(type)); - if (strcmp(type, RELOC("display")) != 0) - continue; - - /* It seems OF doesn't null-terminate the path :-( */ - path = RELOC(prom_scratch); - memset(path, 0, PROM_SCRATCH_SIZE); - - /* - * leave some room at the end of the path for appending extra - * arguments - */ - if (call_prom("package-to-path", 3, 1, node, path, - PROM_SCRATCH_SIZE-10) == PROM_ERROR) - continue; - prom_printf("found display : %s, opening ... ", path); - - ih = call_prom("open", 1, 1, path); - if (ih == 0) { - prom_printf("failed\n"); - continue; - } - - /* Success */ - prom_printf("done\n"); - prom_setprop(node, "linux,opened", NULL, 0); - - /* - * stdout wasn't a display node, pick the first we can find - * for btext - */ - if (_prom->disp_node == 0) - _prom->disp_node = node; - - /* Setup a useable color table when the appropriate - * method is available. Should update this to set-colors */ - clut = RELOC(default_colors); - for (i = 0; i < 32; i++, clut += 3) - if (prom_set_color(ih, i, clut[0], clut[1], - clut[2]) != 0) - break; - -#ifdef CONFIG_LOGO_LINUX_CLUT224 - clut = PTRRELOC(RELOC(logo_linux_clut224.clut)); - for (i = 0; i < RELOC(logo_linux_clut224.clutsize); i++, clut += 3) - if (prom_set_color(ih, i + 32, clut[0], clut[1], - clut[2]) != 0) - break; -#endif /* CONFIG_LOGO_LINUX_CLUT224 */ - } -} - - -/* Return (relocated) pointer to this much memory: moves initrd if reqd. */ -static void __init *make_room(unsigned long *mem_start, unsigned long *mem_end, - unsigned long needed, unsigned long align) -{ - unsigned long offset = reloc_offset(); - void *ret; - - *mem_start = _ALIGN(*mem_start, align); - while ((*mem_start + needed) > *mem_end) { - unsigned long room, chunk; - - prom_debug("Chunk exhausted, claiming more at %x...\n", - RELOC(alloc_bottom)); - room = RELOC(alloc_top) - RELOC(alloc_bottom); - if (room > DEVTREE_CHUNK_SIZE) - room = DEVTREE_CHUNK_SIZE; - if (room < PAGE_SIZE) - prom_panic("No memory for flatten_device_tree (no room)"); - chunk = alloc_up(room, 0); - if (chunk == 0) - prom_panic("No memory for flatten_device_tree (claim failed)"); - *mem_end = RELOC(alloc_top); - } - - ret = (void *)*mem_start; - *mem_start += needed; - - return ret; -} - -#define dt_push_token(token, mem_start, mem_end) \ - do { *((u32 *)make_room(mem_start, mem_end, 4, 4)) = token; } while(0) - -static unsigned long __init dt_find_string(char *str) -{ - unsigned long offset = reloc_offset(); - char *s, *os; - - s = os = (char *)RELOC(dt_string_start); - s += 4; - while (s < (char *)RELOC(dt_string_end)) { - if (strcmp(s, str) == 0) - return s - os; - s += strlen(s) + 1; - } - return 0; -} - -/* - * The Open Firmware 1275 specification states properties must be 31 bytes or - * less, however not all firmwares obey this. Make it 64 bytes to be safe. - */ -#define MAX_PROPERTY_NAME 64 - -static void __init scan_dt_build_strings(phandle node, - unsigned long *mem_start, - unsigned long *mem_end) -{ - unsigned long offset = reloc_offset(); - char *prev_name, *namep, *sstart; - unsigned long soff; - phandle child; - - sstart = (char *)RELOC(dt_string_start); - - /* get and store all property names */ - prev_name = RELOC(""); - for (;;) { - /* 64 is max len of name including nul. */ - namep = make_room(mem_start, mem_end, MAX_PROPERTY_NAME, 1); - if (call_prom("nextprop", 3, 1, node, prev_name, namep) != 1) { - /* No more nodes: unwind alloc */ - *mem_start = (unsigned long)namep; - break; - } - - /* skip "name" */ - if (strcmp(namep, RELOC("name")) == 0) { - *mem_start = (unsigned long)namep; - prev_name = RELOC("name"); - continue; - } - /* get/create string entry */ - soff = dt_find_string(namep); - if (soff != 0) { - *mem_start = (unsigned long)namep; - namep = sstart + soff; - } else { - /* Trim off some if we can */ - *mem_start = (unsigned long)namep + strlen(namep) + 1; - RELOC(dt_string_end) = *mem_start; - } - prev_name = namep; - } - - /* do all our children */ - child = call_prom("child", 1, 1, node); - while (child != 0) { - scan_dt_build_strings(child, mem_start, mem_end); - child = call_prom("peer", 1, 1, child); - } -} - -static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, - unsigned long *mem_end) -{ - phandle child; - char *namep, *prev_name, *sstart, *p, *ep, *lp, *path; - unsigned long soff; - unsigned char *valp; - unsigned long offset = reloc_offset(); - static char pname[MAX_PROPERTY_NAME]; - int l; - - dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end); - - /* get the node's full name */ - namep = (char *)*mem_start; - l = call_prom("package-to-path", 3, 1, node, - namep, *mem_end - *mem_start); - if (l >= 0) { - /* Didn't fit? Get more room. */ - if ((l+1) > (*mem_end - *mem_start)) { - namep = make_room(mem_start, mem_end, l+1, 1); - call_prom("package-to-path", 3, 1, node, namep, l); - } - namep[l] = '\0'; - - /* Fixup an Apple bug where they have bogus \0 chars in the - * middle of the path in some properties - */ - for (p = namep, ep = namep + l; p < ep; p++) - if (*p == '\0') { - memmove(p, p+1, ep - p); - ep--; l--; p--; - } - - /* now try to extract the unit name in that mess */ - for (p = namep, lp = NULL; *p; p++) - if (*p == '/') - lp = p + 1; - if (lp != NULL) - memmove(namep, lp, strlen(lp) + 1); - *mem_start = _ALIGN(((unsigned long) namep) + - strlen(namep) + 1, 4); - } - - /* get it again for debugging */ - path = RELOC(prom_scratch); - memset(path, 0, PROM_SCRATCH_SIZE); - call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); - - /* get and store all properties */ - prev_name = RELOC(""); - sstart = (char *)RELOC(dt_string_start); - for (;;) { - if (call_prom("nextprop", 3, 1, node, prev_name, - RELOC(pname)) != 1) - break; - - /* skip "name" */ - if (strcmp(RELOC(pname), RELOC("name")) == 0) { - prev_name = RELOC("name"); - continue; - } - - /* find string offset */ - soff = dt_find_string(RELOC(pname)); - if (soff == 0) { - prom_printf("WARNING: Can't find string index for" - " <%s>, node %s\n", RELOC(pname), path); - break; - } - prev_name = sstart + soff; - - /* get length */ - l = call_prom("getproplen", 2, 1, node, RELOC(pname)); - - /* sanity checks */ - if (l == PROM_ERROR) - continue; - if (l > MAX_PROPERTY_LENGTH) { - prom_printf("WARNING: ignoring large property "); - /* It seems OF doesn't null-terminate the path :-( */ - prom_printf("[%s] ", path); - prom_printf("%s length 0x%x\n", RELOC(pname), l); - continue; - } - - /* push property head */ - dt_push_token(OF_DT_PROP, mem_start, mem_end); - dt_push_token(l, mem_start, mem_end); - dt_push_token(soff, mem_start, mem_end); - - /* push property content */ - valp = make_room(mem_start, mem_end, l, 4); - call_prom("getprop", 4, 1, node, RELOC(pname), valp, l); - *mem_start = _ALIGN(*mem_start, 4); - } - - /* Add a "linux,phandle" property. */ - soff = dt_find_string(RELOC("linux,phandle")); - if (soff == 0) - prom_printf("WARNING: Can't find string index for" - " <linux-phandle> node %s\n", path); - else { - dt_push_token(OF_DT_PROP, mem_start, mem_end); - dt_push_token(4, mem_start, mem_end); - dt_push_token(soff, mem_start, mem_end); - valp = make_room(mem_start, mem_end, 4, 4); - *(u32 *)valp = node; - } - - /* do all our children */ - child = call_prom("child", 1, 1, node); - while (child != 0) { - scan_dt_build_struct(child, mem_start, mem_end); - child = call_prom("peer", 1, 1, child); - } - - dt_push_token(OF_DT_END_NODE, mem_start, mem_end); -} - -static void __init flatten_device_tree(void) -{ - phandle root; - unsigned long offset = reloc_offset(); - unsigned long mem_start, mem_end, room; - struct boot_param_header *hdr; - struct prom_t *_prom = PTRRELOC(&prom); - char *namep; - u64 *rsvmap; - - /* - * Check how much room we have between alloc top & bottom (+/- a - * few pages), crop to 4Mb, as this is our "chuck" size - */ - room = RELOC(alloc_top) - RELOC(alloc_bottom) - 0x4000; - if (room > DEVTREE_CHUNK_SIZE) - room = DEVTREE_CHUNK_SIZE; - prom_debug("starting device tree allocs at %x\n", RELOC(alloc_bottom)); - - /* Now try to claim that */ - mem_start = (unsigned long)alloc_up(room, PAGE_SIZE); - if (mem_start == 0) - prom_panic("Can't allocate initial device-tree chunk\n"); - mem_end = RELOC(alloc_top); - - /* Get root of tree */ - root = call_prom("peer", 1, 1, (phandle)0); - if (root == (phandle)0) - prom_panic ("couldn't get device tree root\n"); - - /* Build header and make room for mem rsv map */ - mem_start = _ALIGN(mem_start, 4); - hdr = make_room(&mem_start, &mem_end, - sizeof(struct boot_param_header), 4); - RELOC(dt_header_start) = (unsigned long)hdr; - rsvmap = make_room(&mem_start, &mem_end, sizeof(mem_reserve_map), 8); - - /* Start of strings */ - mem_start = PAGE_ALIGN(mem_start); - RELOC(dt_string_start) = mem_start; - mem_start += 4; /* hole */ - - /* Add "linux,phandle" in there, we'll need it */ - namep = make_room(&mem_start, &mem_end, 16, 1); - strcpy(namep, RELOC("linux,phandle")); - mem_start = (unsigned long)namep + strlen(namep) + 1; - - /* Build string array */ - prom_printf("Building dt strings...\n"); - scan_dt_build_strings(root, &mem_start, &mem_end); - RELOC(dt_string_end) = mem_start; - - /* Build structure */ - mem_start = PAGE_ALIGN(mem_start); - RELOC(dt_struct_start) = mem_start; - prom_printf("Building dt structure...\n"); - scan_dt_build_struct(root, &mem_start, &mem_end); - dt_push_token(OF_DT_END, &mem_start, &mem_end); - RELOC(dt_struct_end) = PAGE_ALIGN(mem_start); - - /* Finish header */ - hdr->boot_cpuid_phys = _prom->cpu; - hdr->magic = OF_DT_HEADER; - hdr->totalsize = RELOC(dt_struct_end) - RELOC(dt_header_start); - hdr->off_dt_struct = RELOC(dt_struct_start) - RELOC(dt_header_start); - hdr->off_dt_strings = RELOC(dt_string_start) - RELOC(dt_header_start); - hdr->dt_strings_size = RELOC(dt_string_end) - RELOC(dt_string_start); - hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - RELOC(dt_header_start); - hdr->version = OF_DT_VERSION; - /* Version 16 is not backward compatible */ - hdr->last_comp_version = 0x10; - - /* Reserve the whole thing and copy the reserve map in, we - * also bump mem_reserve_cnt to cause further reservations to - * fail since it's too late. - */ - reserve_mem(RELOC(dt_header_start), hdr->totalsize); - memcpy(rsvmap, RELOC(mem_reserve_map), sizeof(mem_reserve_map)); - -#ifdef DEBUG_PROM - { - int i; - prom_printf("reserved memory map:\n"); - for (i = 0; i < RELOC(mem_reserve_cnt); i++) - prom_printf(" %x - %x\n", RELOC(mem_reserve_map)[i].base, - RELOC(mem_reserve_map)[i].size); - } -#endif - RELOC(mem_reserve_cnt) = MEM_RESERVE_MAP_SIZE; - - prom_printf("Device tree strings 0x%x -> 0x%x\n", - RELOC(dt_string_start), RELOC(dt_string_end)); - prom_printf("Device tree struct 0x%x -> 0x%x\n", - RELOC(dt_struct_start), RELOC(dt_struct_end)); - -} - - -static void __init fixup_device_tree(void) -{ - unsigned long offset = reloc_offset(); - phandle u3, i2c, mpic; - u32 u3_rev; - u32 interrupts[2]; - u32 parent; - - /* Some G5s have a missing interrupt definition, fix it up here */ - u3 = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000")); - if (!PHANDLE_VALID(u3)) - return; - i2c = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000/i2c@f8001000")); - if (!PHANDLE_VALID(i2c)) - return; - mpic = call_prom("finddevice", 1, 1, ADDR("/u3@0,f8000000/mpic@f8040000")); - if (!PHANDLE_VALID(mpic)) - return; - - /* check if proper rev of u3 */ - if (prom_getprop(u3, "device-rev", &u3_rev, sizeof(u3_rev)) - == PROM_ERROR) - return; - if (u3_rev < 0x35 || u3_rev > 0x39) - return; - /* does it need fixup ? */ - if (prom_getproplen(i2c, "interrupts") > 0) - return; - - prom_printf("fixing up bogus interrupts for u3 i2c...\n"); - - /* interrupt on this revision of u3 is number 0 and level */ - interrupts[0] = 0; - interrupts[1] = 1; - prom_setprop(i2c, "interrupts", &interrupts, sizeof(interrupts)); - parent = (u32)mpic; - prom_setprop(i2c, "interrupt-parent", &parent, sizeof(parent)); -} - - -static void __init prom_find_boot_cpu(void) -{ - unsigned long offset = reloc_offset(); - struct prom_t *_prom = PTRRELOC(&prom); - u32 getprop_rval; - ihandle prom_cpu; - phandle cpu_pkg; - - if (prom_getprop(_prom->chosen, "cpu", &prom_cpu, sizeof(prom_cpu)) <= 0) - prom_panic("cannot find boot cpu"); - - cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu); - - prom_getprop(cpu_pkg, "reg", &getprop_rval, sizeof(getprop_rval)); - _prom->cpu = getprop_rval; - - prom_debug("Booting CPU hw index = 0x%x\n", _prom->cpu); -} - -static void __init prom_check_initrd(unsigned long r3, unsigned long r4) -{ -#ifdef CONFIG_BLK_DEV_INITRD - unsigned long offset = reloc_offset(); - struct prom_t *_prom = PTRRELOC(&prom); - - if ( r3 && r4 && r4 != 0xdeadbeef) { - u64 val; - - RELOC(prom_initrd_start) = (r3 >= KERNELBASE) ? __pa(r3) : r3; - RELOC(prom_initrd_end) = RELOC(prom_initrd_start) + r4; - - val = (u64)RELOC(prom_initrd_start); - prom_setprop(_prom->chosen, "linux,initrd-start", &val, sizeof(val)); - val = (u64)RELOC(prom_initrd_end); - prom_setprop(_prom->chosen, "linux,initrd-end", &val, sizeof(val)); - - reserve_mem(RELOC(prom_initrd_start), - RELOC(prom_initrd_end) - RELOC(prom_initrd_start)); - - prom_debug("initrd_start=0x%x\n", RELOC(prom_initrd_start)); - prom_debug("initrd_end=0x%x\n", RELOC(prom_initrd_end)); - } -#endif /* CONFIG_BLK_DEV_INITRD */ -} - -/* - * We enter here early on, when the Open Firmware prom is still - * handling exceptions and the MMU hash table for us. - */ - -unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long pp, - unsigned long r6, unsigned long r7) -{ - unsigned long offset = reloc_offset(); - struct prom_t *_prom = PTRRELOC(&prom); - unsigned long phys = KERNELBASE - offset; - u32 getprop_rval; - - /* - * First zero the BSS - */ - memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start); - - /* - * Init interface to Open Firmware, get some node references, - * like /chosen - */ - prom_init_client_services(pp); - - /* - * Init prom stdout device - */ - prom_init_stdout(); - prom_debug("klimit=0x%x\n", RELOC(klimit)); - prom_debug("offset=0x%x\n", offset); - - /* - * Check for an initrd - */ - prom_check_initrd(r3, r4); - - /* - * Get default machine type. At this point, we do not differenciate - * between pSeries SMP and pSeries LPAR - */ - RELOC(of_platform) = prom_find_machine_type(); - getprop_rval = RELOC(of_platform); - prom_setprop(_prom->chosen, "linux,platform", - &getprop_rval, sizeof(getprop_rval)); - - /* - * On pSeries, inform the firmware about our capabilities - */ - if (RELOC(of_platform) == PLATFORM_PSERIES || - RELOC(of_platform) == PLATFORM_PSERIES_LPAR) - prom_send_capabilities(); - - /* - * On pSeries and Cell, copy the CPU hold code - */ - if (RELOC(of_platform) & (PLATFORM_PSERIES | PLATFORM_CELL)) - copy_and_flush(0, KERNELBASE - offset, 0x100, 0); - - /* - * Get memory cells format - */ - getprop_rval = 1; - prom_getprop(_prom->root, "#size-cells", - &getprop_rval, sizeof(getprop_rval)); - _prom->root_size_cells = getprop_rval; - getprop_rval = 2; - prom_getprop(_prom->root, "#address-cells", - &getprop_rval, sizeof(getprop_rval)); - _prom->root_addr_cells = getprop_rval; - - /* - * Do early parsing of command line - */ - early_cmdline_parse(); - - /* - * Initialize memory management within prom_init - */ - prom_init_mem(); - - /* - * Determine which cpu is actually running right _now_ - */ - prom_find_boot_cpu(); - - /* - * Initialize display devices - */ - prom_check_displays(); - - /* - * Initialize IOMMU (TCE tables) on pSeries. Do that before anything else - * that uses the allocator, we need to make sure we get the top of memory - * available for us here... - */ - if (RELOC(of_platform) == PLATFORM_PSERIES) - prom_initialize_tce_table(); - - /* - * On non-powermacs, try to instantiate RTAS and puts all CPUs - * in spin-loops. PowerMacs don't have a working RTAS and use - * a different way to spin CPUs - */ - if (RELOC(of_platform) != PLATFORM_POWERMAC) { - prom_instantiate_rtas(); - prom_hold_cpus(); - } - - /* - * Fill in some infos for use by the kernel later on - */ - if (RELOC(ppc64_iommu_off)) - prom_setprop(_prom->chosen, "linux,iommu-off", NULL, 0); - - if (RELOC(iommu_force_on)) - prom_setprop(_prom->chosen, "linux,iommu-force-on", NULL, 0); - - if (RELOC(prom_memory_limit)) - prom_setprop(_prom->chosen, "linux,memory-limit", - PTRRELOC(&prom_memory_limit), sizeof(RELOC(prom_memory_limit))); - - if (RELOC(prom_tce_alloc_start)) { - prom_setprop(_prom->chosen, "linux,tce-alloc-start", - PTRRELOC(&prom_tce_alloc_start), sizeof(RELOC(prom_tce_alloc_start))); - prom_setprop(_prom->chosen, "linux,tce-alloc-end", - PTRRELOC(&prom_tce_alloc_end), sizeof(RELOC(prom_tce_alloc_end))); - } - - /* - * Fixup any known bugs in the device-tree - */ - fixup_device_tree(); - - /* - * Now finally create the flattened device-tree - */ - prom_printf("copying OF device tree ...\n"); - flatten_device_tree(); - - /* in case stdin is USB and still active on IBM machines... */ - prom_close_stdin(); - - /* - * Call OF "quiesce" method to shut down pending DMA's from - * devices etc... - */ - prom_printf("Calling quiesce ...\n"); - call_prom("quiesce", 0, 0); - - /* - * And finally, call the kernel passing it the flattened device - * tree and NULL as r5, thus triggering the new entry point which - * is common to us and kexec - */ - prom_printf("returning from prom_init\n"); - prom_debug("->dt_header_start=0x%x\n", RELOC(dt_header_start)); - prom_debug("->phys=0x%x\n", phys); - - __start(RELOC(dt_header_start), phys, 0); - - return 0; -} - diff --git a/arch/ppc64/kernel/rtc.c b/arch/ppc64/kernel/rtc.c deleted file mode 100644 index 79e7ed2858dd..000000000000 --- a/arch/ppc64/kernel/rtc.c +++ /dev/null @@ -1,358 +0,0 @@ -/* - * Real Time Clock interface for PPC64. - * - * Based on rtc.c by Paul Gortmaker - * - * This driver allows use of the real time clock - * from user space. It exports the /dev/rtc - * interface supporting various ioctl() and also the - * /proc/driver/rtc pseudo-file for status information. - * - * Interface does not support RTC interrupts nor an alarm. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * 1.0 Mike Corrigan: IBM iSeries rtc support - * 1.1 Dave Engebretsen: IBM pSeries rtc support - */ - -#define RTC_VERSION "1.1" - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/miscdevice.h> -#include <linux/ioport.h> -#include <linux/fcntl.h> -#include <linux/mc146818rtc.h> -#include <linux/init.h> -#include <linux/poll.h> -#include <linux/proc_fs.h> -#include <linux/spinlock.h> -#include <linux/bcd.h> -#include <linux/interrupt.h> -#include <linux/delay.h> - -#include <asm/io.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/time.h> -#include <asm/rtas.h> - -#include <asm/machdep.h> - -/* - * We sponge a minor off of the misc major. No need slurping - * up another valuable major dev number for this. If you add - * an ioctl, make sure you don't conflict with SPARC's RTC - * ioctls. - */ - -static ssize_t rtc_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos); - -static int rtc_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg); - -static int rtc_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data); - -/* - * If this driver ever becomes modularised, it will be really nice - * to make the epoch retain its value across module reload... - */ - -static unsigned long epoch = 1900; /* year corresponding to 0x00 */ - -static const unsigned char days_in_mo[] = -{0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; - -/* - * Now all the various file operations that we export. - */ - -static ssize_t rtc_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - return -EIO; -} - -static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, - unsigned long arg) -{ - struct rtc_time wtime; - - switch (cmd) { - case RTC_RD_TIME: /* Read the time/date from RTC */ - { - memset(&wtime, 0, sizeof(struct rtc_time)); - ppc_md.get_rtc_time(&wtime); - break; - } - case RTC_SET_TIME: /* Set the RTC */ - { - struct rtc_time rtc_tm; - unsigned char mon, day, hrs, min, sec, leap_yr; - unsigned int yrs; - - if (!capable(CAP_SYS_TIME)) - return -EACCES; - - if (copy_from_user(&rtc_tm, (struct rtc_time __user *)arg, - sizeof(struct rtc_time))) - return -EFAULT; - - yrs = rtc_tm.tm_year; - mon = rtc_tm.tm_mon + 1; /* tm_mon starts at zero */ - day = rtc_tm.tm_mday; - hrs = rtc_tm.tm_hour; - min = rtc_tm.tm_min; - sec = rtc_tm.tm_sec; - - if (yrs < 70) - return -EINVAL; - - leap_yr = ((!(yrs % 4) && (yrs % 100)) || !(yrs % 400)); - - if ((mon > 12) || (day == 0)) - return -EINVAL; - - if (day > (days_in_mo[mon] + ((mon == 2) && leap_yr))) - return -EINVAL; - - if ((hrs >= 24) || (min >= 60) || (sec >= 60)) - return -EINVAL; - - if ( yrs > 169 ) - return -EINVAL; - - ppc_md.set_rtc_time(&rtc_tm); - - return 0; - } - case RTC_EPOCH_READ: /* Read the epoch. */ - { - return put_user (epoch, (unsigned long __user *)arg); - } - case RTC_EPOCH_SET: /* Set the epoch. */ - { - /* - * There were no RTC clocks before 1900. - */ - if (arg < 1900) - return -EINVAL; - - if (!capable(CAP_SYS_TIME)) - return -EACCES; - - epoch = arg; - return 0; - } - default: - return -EINVAL; - } - return copy_to_user((void __user *)arg, &wtime, sizeof wtime) ? -EFAULT : 0; -} - -static int rtc_open(struct inode *inode, struct file *file) -{ - nonseekable_open(inode, file); - return 0; -} - -static int rtc_release(struct inode *inode, struct file *file) -{ - return 0; -} - -/* - * The various file operations we support. - */ -static struct file_operations rtc_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .read = rtc_read, - .ioctl = rtc_ioctl, - .open = rtc_open, - .release = rtc_release, -}; - -static struct miscdevice rtc_dev = { - .minor = RTC_MINOR, - .name = "rtc", - .fops = &rtc_fops -}; - -static int __init rtc_init(void) -{ - int retval; - - retval = misc_register(&rtc_dev); - if(retval < 0) - return retval; - -#ifdef CONFIG_PROC_FS - if (create_proc_read_entry("driver/rtc", 0, NULL, rtc_read_proc, NULL) - == NULL) { - misc_deregister(&rtc_dev); - return -ENOMEM; - } -#endif - - printk(KERN_INFO "i/pSeries Real Time Clock Driver v" RTC_VERSION "\n"); - - return 0; -} - -static void __exit rtc_exit (void) -{ - remove_proc_entry ("driver/rtc", NULL); - misc_deregister(&rtc_dev); -} - -module_init(rtc_init); -module_exit(rtc_exit); - -/* - * Info exported via "/proc/driver/rtc". - */ - -static int rtc_proc_output (char *buf) -{ - - char *p; - struct rtc_time tm; - - p = buf; - - ppc_md.get_rtc_time(&tm); - - /* - * There is no way to tell if the luser has the RTC set for local - * time or for Universal Standard Time (GMT). Probably local though. - */ - p += sprintf(p, - "rtc_time\t: %02d:%02d:%02d\n" - "rtc_date\t: %04d-%02d-%02d\n" - "rtc_epoch\t: %04lu\n", - tm.tm_hour, tm.tm_min, tm.tm_sec, - tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, epoch); - - p += sprintf(p, - "DST_enable\t: no\n" - "BCD\t\t: yes\n" - "24hr\t\t: yes\n" ); - - return p - buf; -} - -static int rtc_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int len = rtc_proc_output (page); - if (len <= off+count) *eof = 1; - *start = page + off; - len -= off; - if (len>count) len = count; - if (len<0) len = 0; - return len; -} - -#ifdef CONFIG_PPC_RTAS -#define MAX_RTC_WAIT 5000 /* 5 sec */ -#define RTAS_CLOCK_BUSY (-2) -unsigned long rtas_get_boot_time(void) -{ - int ret[8]; - int error, wait_time; - unsigned long max_wait_tb; - - max_wait_tb = __get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; - do { - error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); - if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) { - wait_time = rtas_extended_busy_delay_time(error); - /* This is boot time so we spin. */ - udelay(wait_time*1000); - error = RTAS_CLOCK_BUSY; - } - } while (error == RTAS_CLOCK_BUSY && (__get_tb() < max_wait_tb)); - - if (error != 0 && printk_ratelimit()) { - printk(KERN_WARNING "error: reading the clock failed (%d)\n", - error); - return 0; - } - - return mktime(ret[0], ret[1], ret[2], ret[3], ret[4], ret[5]); -} - -/* NOTE: get_rtc_time will get an error if executed in interrupt context - * and if a delay is needed to read the clock. In this case we just - * silently return without updating rtc_tm. - */ -void rtas_get_rtc_time(struct rtc_time *rtc_tm) -{ - int ret[8]; - int error, wait_time; - unsigned long max_wait_tb; - - max_wait_tb = __get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; - do { - error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); - if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) { - if (in_interrupt() && printk_ratelimit()) { - printk(KERN_WARNING "error: reading clock would delay interrupt\n"); - return; /* delay not allowed */ - } - wait_time = rtas_extended_busy_delay_time(error); - msleep_interruptible(wait_time); - error = RTAS_CLOCK_BUSY; - } - } while (error == RTAS_CLOCK_BUSY && (__get_tb() < max_wait_tb)); - - if (error != 0 && printk_ratelimit()) { - printk(KERN_WARNING "error: reading the clock failed (%d)\n", - error); - return; - } - - rtc_tm->tm_sec = ret[5]; - rtc_tm->tm_min = ret[4]; - rtc_tm->tm_hour = ret[3]; - rtc_tm->tm_mday = ret[2]; - rtc_tm->tm_mon = ret[1] - 1; - rtc_tm->tm_year = ret[0] - 1900; -} - -int rtas_set_rtc_time(struct rtc_time *tm) -{ - int error, wait_time; - unsigned long max_wait_tb; - - max_wait_tb = __get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; - do { - error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL, - tm->tm_year + 1900, tm->tm_mon + 1, - tm->tm_mday, tm->tm_hour, tm->tm_min, - tm->tm_sec, 0); - if (error == RTAS_CLOCK_BUSY || rtas_is_extended_busy(error)) { - if (in_interrupt()) - return 1; /* probably decrementer */ - wait_time = rtas_extended_busy_delay_time(error); - msleep_interruptible(wait_time); - error = RTAS_CLOCK_BUSY; - } - } while (error == RTAS_CLOCK_BUSY && (__get_tb() < max_wait_tb)); - - if (error != 0 && printk_ratelimit()) - printk(KERN_WARNING "error: setting the clock failed (%d)\n", - error); - - return 0; -} -#endif diff --git a/arch/ppc64/kernel/semaphore.c b/arch/ppc64/kernel/semaphore.c deleted file mode 100644 index a1c1db573e9c..000000000000 --- a/arch/ppc64/kernel/semaphore.c +++ /dev/null @@ -1,136 +0,0 @@ -/* - * - * - * PowerPC-specific semaphore code. - * - * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * April 2001 - Reworked by Paul Mackerras <paulus@samba.org> - * to eliminate the SMP races in the old version between the updates - * of `count' and `waking'. Now we use negative `count' values to - * indicate that some process(es) are waiting for the semaphore. - */ - -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/module.h> - -#include <asm/atomic.h> -#include <asm/semaphore.h> -#include <asm/errno.h> - -/* - * Atomically update sem->count. - * This does the equivalent of the following: - * - * old_count = sem->count; - * tmp = MAX(old_count, 0) + incr; - * sem->count = tmp; - * return old_count; - */ -static inline int __sem_update_count(struct semaphore *sem, int incr) -{ - int old_count, tmp; - - __asm__ __volatile__("\n" -"1: lwarx %0,0,%3\n" -" srawi %1,%0,31\n" -" andc %1,%0,%1\n" -" add %1,%1,%4\n" -" stwcx. %1,0,%3\n" -" bne 1b" - : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) - : "r" (&sem->count), "r" (incr), "m" (sem->count) - : "cc"); - - return old_count; -} - -void __up(struct semaphore *sem) -{ - /* - * Note that we incremented count in up() before we came here, - * but that was ineffective since the result was <= 0, and - * any negative value of count is equivalent to 0. - * This ends up setting count to 1, unless count is now > 0 - * (i.e. because some other cpu has called up() in the meantime), - * in which case we just increment count. - */ - __sem_update_count(sem, 1); - wake_up(&sem->wait); -} -EXPORT_SYMBOL(__up); - -/* - * Note that when we come in to __down or __down_interruptible, - * we have already decremented count, but that decrement was - * ineffective since the result was < 0, and any negative value - * of count is equivalent to 0. - * Thus it is only when we decrement count from some value > 0 - * that we have actually got the semaphore. - */ -void __sched __down(struct semaphore *sem) -{ - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - - __set_task_state(tsk, TASK_UNINTERRUPTIBLE); - add_wait_queue_exclusive(&sem->wait, &wait); - - /* - * Try to get the semaphore. If the count is > 0, then we've - * got the semaphore; we decrement count and exit the loop. - * If the count is 0 or negative, we set it to -1, indicating - * that we are asleep, and then sleep. - */ - while (__sem_update_count(sem, -1) <= 0) { - schedule(); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - } - remove_wait_queue(&sem->wait, &wait); - __set_task_state(tsk, TASK_RUNNING); - - /* - * If there are any more sleepers, wake one of them up so - * that it can either get the semaphore, or set count to -1 - * indicating that there are still processes sleeping. - */ - wake_up(&sem->wait); -} -EXPORT_SYMBOL(__down); - -int __sched __down_interruptible(struct semaphore * sem) -{ - int retval = 0; - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - - __set_task_state(tsk, TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&sem->wait, &wait); - - while (__sem_update_count(sem, -1) <= 0) { - if (signal_pending(current)) { - /* - * A signal is pending - give up trying. - * Set sem->count to 0 if it is negative, - * since we are no longer sleeping. - */ - __sem_update_count(sem, 0); - retval = -EINTR; - break; - } - schedule(); - set_task_state(tsk, TASK_INTERRUPTIBLE); - } - remove_wait_queue(&sem->wait, &wait); - __set_task_state(tsk, TASK_RUNNING); - - wake_up(&sem->wait); - return retval; -} -EXPORT_SYMBOL(__down_interruptible); diff --git a/arch/ppc64/kernel/vdso.c b/arch/ppc64/kernel/vdso.c deleted file mode 100644 index 1bbacac44988..000000000000 --- a/arch/ppc64/kernel/vdso.c +++ /dev/null @@ -1,625 +0,0 @@ -/* - * linux/arch/ppc64/kernel/vdso.c - * - * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp. - * <benh@kernel.crashing.org> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/smp_lock.h> -#include <linux/stddef.h> -#include <linux/unistd.h> -#include <linux/slab.h> -#include <linux/user.h> -#include <linux/elf.h> -#include <linux/security.h> -#include <linux/bootmem.h> - -#include <asm/pgtable.h> -#include <asm/system.h> -#include <asm/processor.h> -#include <asm/mmu.h> -#include <asm/mmu_context.h> -#include <asm/machdep.h> -#include <asm/cputable.h> -#include <asm/sections.h> -#include <asm/systemcfg.h> -#include <asm/vdso.h> - -#undef DEBUG - -#ifdef DEBUG -#define DBG(fmt...) printk(fmt) -#else -#define DBG(fmt...) -#endif - - -/* - * The vDSOs themselves are here - */ -extern char vdso64_start, vdso64_end; -extern char vdso32_start, vdso32_end; - -static void *vdso64_kbase = &vdso64_start; -static void *vdso32_kbase = &vdso32_start; - -unsigned int vdso64_pages; -unsigned int vdso32_pages; - -/* Signal trampolines user addresses */ - -unsigned long vdso64_rt_sigtramp; -unsigned long vdso32_sigtramp; -unsigned long vdso32_rt_sigtramp; - -/* Format of the patch table */ -struct vdso_patch_def -{ - u32 pvr_mask, pvr_value; - const char *gen_name; - const char *fix_name; -}; - -/* Table of functions to patch based on the CPU type/revision - * - * TODO: Improve by adding whole lists for each entry - */ -static struct vdso_patch_def vdso_patches[] = { - { - 0xffff0000, 0x003a0000, /* POWER5 */ - "__kernel_sync_dicache", "__kernel_sync_dicache_p5" - }, - { - 0xffff0000, 0x003b0000, /* POWER5 */ - "__kernel_sync_dicache", "__kernel_sync_dicache_p5" - }, -}; - -/* - * Some infos carried around for each of them during parsing at - * boot time. - */ -struct lib32_elfinfo -{ - Elf32_Ehdr *hdr; /* ptr to ELF */ - Elf32_Sym *dynsym; /* ptr to .dynsym section */ - unsigned long dynsymsize; /* size of .dynsym section */ - char *dynstr; /* ptr to .dynstr section */ - unsigned long text; /* offset of .text section in .so */ -}; - -struct lib64_elfinfo -{ - Elf64_Ehdr *hdr; - Elf64_Sym *dynsym; - unsigned long dynsymsize; - char *dynstr; - unsigned long text; -}; - - -#ifdef __DEBUG -static void dump_one_vdso_page(struct page *pg, struct page *upg) -{ - printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT), - page_count(pg), - pg->flags); - if (upg/* && pg != upg*/) { - printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg) << PAGE_SHIFT), - page_count(upg), - upg->flags); - } - printk("\n"); -} - -static void dump_vdso_pages(struct vm_area_struct * vma) -{ - int i; - - if (!vma || test_thread_flag(TIF_32BIT)) { - printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase); - for (i=0; i<vdso32_pages; i++) { - struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); - struct page *upg = (vma && vma->vm_mm) ? - follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0) - : NULL; - dump_one_vdso_page(pg, upg); - } - } - if (!vma || !test_thread_flag(TIF_32BIT)) { - printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase); - for (i=0; i<vdso64_pages; i++) { - struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); - struct page *upg = (vma && vma->vm_mm) ? - follow_page(vma->vm_mm, vma->vm_start + i*PAGE_SIZE, 0) - : NULL; - dump_one_vdso_page(pg, upg); - } - } -} -#endif /* DEBUG */ - -/* - * Keep a dummy vma_close for now, it will prevent VMA merging. - */ -static void vdso_vma_close(struct vm_area_struct * vma) -{ -} - -/* - * Our nopage() function, maps in the actual vDSO kernel pages, they will - * be mapped read-only by do_no_page(), and eventually COW'ed, either - * right away for an initial write access, or by do_wp_page(). - */ -static struct page * vdso_vma_nopage(struct vm_area_struct * vma, - unsigned long address, int *type) -{ - unsigned long offset = address - vma->vm_start; - struct page *pg; - void *vbase = test_thread_flag(TIF_32BIT) ? vdso32_kbase : vdso64_kbase; - - DBG("vdso_vma_nopage(current: %s, address: %016lx, off: %lx)\n", - current->comm, address, offset); - - if (address < vma->vm_start || address > vma->vm_end) - return NOPAGE_SIGBUS; - - /* - * Last page is systemcfg. - */ - if ((vma->vm_end - address) <= PAGE_SIZE) - pg = virt_to_page(_systemcfg); - else - pg = virt_to_page(vbase + offset); - - get_page(pg); - DBG(" ->page count: %d\n", page_count(pg)); - - return pg; -} - -static struct vm_operations_struct vdso_vmops = { - .close = vdso_vma_close, - .nopage = vdso_vma_nopage, -}; - -/* - * This is called from binfmt_elf, we create the special vma for the - * vDSO and insert it into the mm struct tree - */ -int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack) -{ - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long vdso_pages; - unsigned long vdso_base; - - if (test_thread_flag(TIF_32BIT)) { - vdso_pages = vdso32_pages; - vdso_base = VDSO32_MBASE; - } else { - vdso_pages = vdso64_pages; - vdso_base = VDSO64_MBASE; - } - - current->thread.vdso_base = 0; - - /* vDSO has a problem and was disabled, just don't "enable" it for the - * process - */ - if (vdso_pages == 0) - return 0; - - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (vma == NULL) - return -ENOMEM; - - memset(vma, 0, sizeof(*vma)); - - /* - * pick a base address for the vDSO in process space. We try to put it - * at vdso_base which is the "natural" base for it, but we might fail - * and end up putting it elsewhere. - */ - vdso_base = get_unmapped_area(NULL, vdso_base, - vdso_pages << PAGE_SHIFT, 0, 0); - if (vdso_base & ~PAGE_MASK) { - kmem_cache_free(vm_area_cachep, vma); - return (int)vdso_base; - } - - current->thread.vdso_base = vdso_base; - - vma->vm_mm = mm; - vma->vm_start = current->thread.vdso_base; - - /* - * the VMA size is one page more than the vDSO since systemcfg - * is mapped in the last one - */ - vma->vm_end = vma->vm_start + ((vdso_pages + 1) << PAGE_SHIFT); - - /* - * our vma flags don't have VM_WRITE so by default, the process isn't allowed - * to write those pages. - * gdb can break that with ptrace interface, and thus trigger COW on those - * pages but it's then your responsibility to never do that on the "data" page - * of the vDSO or you'll stop getting kernel updates and your nice userland - * gettimeofday will be totally dead. It's fine to use that for setting - * breakpoints in the vDSO code pages though - */ - vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | VM_RESERVED; - vma->vm_flags |= mm->def_flags; - vma->vm_page_prot = protection_map[vma->vm_flags & 0x7]; - vma->vm_ops = &vdso_vmops; - - down_write(&mm->mmap_sem); - if (insert_vm_struct(mm, vma)) { - up_write(&mm->mmap_sem); - kmem_cache_free(vm_area_cachep, vma); - return -ENOMEM; - } - mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; - up_write(&mm->mmap_sem); - - return 0; -} - -static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname, - unsigned long *size) -{ - Elf32_Shdr *sechdrs; - unsigned int i; - char *secnames; - - /* Grab section headers and strings so we can tell who is who */ - sechdrs = (void *)ehdr + ehdr->e_shoff; - secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; - - /* Find the section they want */ - for (i = 1; i < ehdr->e_shnum; i++) { - if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { - if (size) - *size = sechdrs[i].sh_size; - return (void *)ehdr + sechdrs[i].sh_offset; - } - } - *size = 0; - return NULL; -} - -static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname, - unsigned long *size) -{ - Elf64_Shdr *sechdrs; - unsigned int i; - char *secnames; - - /* Grab section headers and strings so we can tell who is who */ - sechdrs = (void *)ehdr + ehdr->e_shoff; - secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; - - /* Find the section they want */ - for (i = 1; i < ehdr->e_shnum; i++) { - if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { - if (size) - *size = sechdrs[i].sh_size; - return (void *)ehdr + sechdrs[i].sh_offset; - } - } - if (size) - *size = 0; - return NULL; -} - -static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib, const char *symname) -{ - unsigned int i; - char name[32], *c; - - for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) { - if (lib->dynsym[i].st_name == 0) - continue; - strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, 32); - c = strchr(name, '@'); - if (c) - *c = 0; - if (strcmp(symname, name) == 0) - return &lib->dynsym[i]; - } - return NULL; -} - -static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib, const char *symname) -{ - unsigned int i; - char name[32], *c; - - for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) { - if (lib->dynsym[i].st_name == 0) - continue; - strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, 32); - c = strchr(name, '@'); - if (c) - *c = 0; - if (strcmp(symname, name) == 0) - return &lib->dynsym[i]; - } - return NULL; -} - -/* Note that we assume the section is .text and the symbol is relative to - * the library base - */ -static unsigned long __init find_function32(struct lib32_elfinfo *lib, const char *symname) -{ - Elf32_Sym *sym = find_symbol32(lib, symname); - - if (sym == NULL) { - printk(KERN_WARNING "vDSO32: function %s not found !\n", symname); - return 0; - } - return sym->st_value - VDSO32_LBASE; -} - -/* Note that we assume the section is .text and the symbol is relative to - * the library base - */ -static unsigned long __init find_function64(struct lib64_elfinfo *lib, const char *symname) -{ - Elf64_Sym *sym = find_symbol64(lib, symname); - - if (sym == NULL) { - printk(KERN_WARNING "vDSO64: function %s not found !\n", symname); - return 0; - } -#ifdef VDS64_HAS_DESCRIPTORS - return *((u64 *)(vdso64_kbase + sym->st_value - VDSO64_LBASE)) - VDSO64_LBASE; -#else - return sym->st_value - VDSO64_LBASE; -#endif -} - - -static __init int vdso_do_find_sections(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) -{ - void *sect; - - /* - * Locate symbol tables & text section - */ - - v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize); - v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL); - if (v32->dynsym == NULL || v32->dynstr == NULL) { - printk(KERN_ERR "vDSO32: a required symbol section was not found\n"); - return -1; - } - sect = find_section32(v32->hdr, ".text", NULL); - if (sect == NULL) { - printk(KERN_ERR "vDSO32: the .text section was not found\n"); - return -1; - } - v32->text = sect - vdso32_kbase; - - v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize); - v64->dynstr = find_section64(v64->hdr, ".dynstr", NULL); - if (v64->dynsym == NULL || v64->dynstr == NULL) { - printk(KERN_ERR "vDSO64: a required symbol section was not found\n"); - return -1; - } - sect = find_section64(v64->hdr, ".text", NULL); - if (sect == NULL) { - printk(KERN_ERR "vDSO64: the .text section was not found\n"); - return -1; - } - v64->text = sect - vdso64_kbase; - - return 0; -} - -static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) -{ - /* - * Find signal trampolines - */ - - vdso64_rt_sigtramp = find_function64(v64, "__kernel_sigtramp_rt64"); - vdso32_sigtramp = find_function32(v32, "__kernel_sigtramp32"); - vdso32_rt_sigtramp = find_function32(v32, "__kernel_sigtramp_rt32"); -} - -static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) -{ - Elf32_Sym *sym32; - Elf64_Sym *sym64; - - sym32 = find_symbol32(v32, "__kernel_datapage_offset"); - if (sym32 == NULL) { - printk(KERN_ERR "vDSO32: Can't find symbol __kernel_datapage_offset !\n"); - return -1; - } - *((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) = - (vdso32_pages << PAGE_SHIFT) - (sym32->st_value - VDSO32_LBASE); - - sym64 = find_symbol64(v64, "__kernel_datapage_offset"); - if (sym64 == NULL) { - printk(KERN_ERR "vDSO64: Can't find symbol __kernel_datapage_offset !\n"); - return -1; - } - *((int *)(vdso64_kbase + sym64->st_value - VDSO64_LBASE)) = - (vdso64_pages << PAGE_SHIFT) - (sym64->st_value - VDSO64_LBASE); - - return 0; -} - -static int vdso_do_func_patch32(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64, - const char *orig, const char *fix) -{ - Elf32_Sym *sym32_gen, *sym32_fix; - - sym32_gen = find_symbol32(v32, orig); - if (sym32_gen == NULL) { - printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", orig); - return -1; - } - sym32_fix = find_symbol32(v32, fix); - if (sym32_fix == NULL) { - printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", fix); - return -1; - } - sym32_gen->st_value = sym32_fix->st_value; - sym32_gen->st_size = sym32_fix->st_size; - sym32_gen->st_info = sym32_fix->st_info; - sym32_gen->st_other = sym32_fix->st_other; - sym32_gen->st_shndx = sym32_fix->st_shndx; - - return 0; -} - -static int vdso_do_func_patch64(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64, - const char *orig, const char *fix) -{ - Elf64_Sym *sym64_gen, *sym64_fix; - - sym64_gen = find_symbol64(v64, orig); - if (sym64_gen == NULL) { - printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", orig); - return -1; - } - sym64_fix = find_symbol64(v64, fix); - if (sym64_fix == NULL) { - printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", fix); - return -1; - } - sym64_gen->st_value = sym64_fix->st_value; - sym64_gen->st_size = sym64_fix->st_size; - sym64_gen->st_info = sym64_fix->st_info; - sym64_gen->st_other = sym64_fix->st_other; - sym64_gen->st_shndx = sym64_fix->st_shndx; - - return 0; -} - -static __init int vdso_fixup_alt_funcs(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) -{ - u32 pvr; - int i; - - pvr = mfspr(SPRN_PVR); - for (i = 0; i < ARRAY_SIZE(vdso_patches); i++) { - struct vdso_patch_def *patch = &vdso_patches[i]; - int match = (pvr & patch->pvr_mask) == patch->pvr_value; - - DBG("patch %d (mask: %x, pvr: %x) : %s\n", - i, patch->pvr_mask, patch->pvr_value, match ? "match" : "skip"); - - if (!match) - continue; - - DBG("replacing %s with %s...\n", patch->gen_name, patch->fix_name); - - /* - * Patch the 32 bits and 64 bits symbols. Note that we do not patch - * the "." symbol on 64 bits. It would be easy to do, but doesn't - * seem to be necessary, patching the OPD symbol is enough. - */ - vdso_do_func_patch32(v32, v64, patch->gen_name, patch->fix_name); - vdso_do_func_patch64(v32, v64, patch->gen_name, patch->fix_name); - } - - return 0; -} - - -static __init int vdso_setup(void) -{ - struct lib32_elfinfo v32; - struct lib64_elfinfo v64; - - v32.hdr = vdso32_kbase; - v64.hdr = vdso64_kbase; - - if (vdso_do_find_sections(&v32, &v64)) - return -1; - - if (vdso_fixup_datapage(&v32, &v64)) - return -1; - - if (vdso_fixup_alt_funcs(&v32, &v64)) - return -1; - - vdso_setup_trampolines(&v32, &v64); - - return 0; -} - -void __init vdso_init(void) -{ - int i; - - vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT; - vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT; - - DBG("vdso64_kbase: %p, 0x%x pages, vdso32_kbase: %p, 0x%x pages\n", - vdso64_kbase, vdso64_pages, vdso32_kbase, vdso32_pages); - - /* - * Initialize the vDSO images in memory, that is do necessary - * fixups of vDSO symbols, locate trampolines, etc... - */ - if (vdso_setup()) { - printk(KERN_ERR "vDSO setup failure, not enabled !\n"); - /* XXX should free pages here ? */ - vdso64_pages = vdso32_pages = 0; - return; - } - - /* Make sure pages are in the correct state */ - for (i = 0; i < vdso64_pages; i++) { - struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); - ClearPageReserved(pg); - get_page(pg); - } - for (i = 0; i < vdso32_pages; i++) { - struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); - ClearPageReserved(pg); - get_page(pg); - } - - get_page(virt_to_page(_systemcfg)); -} - -int in_gate_area_no_task(unsigned long addr) -{ - return 0; -} - -int in_gate_area(struct task_struct *task, unsigned long addr) -{ - return 0; -} - -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) -{ - return NULL; -} - diff --git a/arch/ppc64/kernel/vmlinux.lds.S b/arch/ppc64/kernel/vmlinux.lds.S deleted file mode 100644 index 022f220e772f..000000000000 --- a/arch/ppc64/kernel/vmlinux.lds.S +++ /dev/null @@ -1,151 +0,0 @@ -#include <asm/page.h> -#include <asm-generic/vmlinux.lds.h> - -OUTPUT_ARCH(powerpc:common64) -jiffies = jiffies_64; -SECTIONS -{ - /* Sections to be discarded. */ - /DISCARD/ : { - *(.exitcall.exit) - } - - - /* Read-only sections, merged into text segment: */ - .text : { - *(.text .text.*) - SCHED_TEXT - LOCK_TEXT - KPROBES_TEXT - *(.fixup) - . = ALIGN(PAGE_SIZE); - _etext = .; - } - - __ex_table : { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } - - __bug_table : { - __start___bug_table = .; - *(__bug_table) - __stop___bug_table = .; - } - - __ftr_fixup : { - __start___ftr_fixup = .; - *(__ftr_fixup) - __stop___ftr_fixup = .; - } - - RODATA - - - /* will be freed after init */ - . = ALIGN(PAGE_SIZE); - __init_begin = .; - - .init.text : { - _sinittext = .; - *(.init.text) - _einittext = .; - } - - .init.data : { - *(.init.data) - } - - . = ALIGN(16); - .init.setup : { - __setup_start = .; - *(.init.setup) - __setup_end = .; - } - - .initcall.init : { - __initcall_start = .; - *(.initcall1.init) - *(.initcall2.init) - *(.initcall3.init) - *(.initcall4.init) - *(.initcall5.init) - *(.initcall6.init) - *(.initcall7.init) - __initcall_end = .; - } - - .con_initcall.init : { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; - } - - SECURITY_INIT - - . = ALIGN(PAGE_SIZE); - .init.ramfs : { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; - } - - .data.percpu : { - __per_cpu_start = .; - *(.data.percpu) - __per_cpu_end = .; - } - - . = ALIGN(PAGE_SIZE); - . = ALIGN(16384); - __init_end = .; - /* freed after init ends here */ - - - /* Read/write sections */ - . = ALIGN(PAGE_SIZE); - . = ALIGN(16384); - _sdata = .; - /* The initial task and kernel stack */ - .data.init_task : { - *(.data.init_task) - } - - . = ALIGN(PAGE_SIZE); - .data.page_aligned : { - *(.data.page_aligned) - } - - .data.cacheline_aligned : { - *(.data.cacheline_aligned) - } - - .data : { - *(.data .data.rel* .toc1) - *(.branch_lt) - } - - .opd : { - *(.opd) - } - - .got : { - __toc_start = .; - *(.got) - *(.toc) - . = ALIGN(PAGE_SIZE); - _edata = .; - } - - - . = ALIGN(PAGE_SIZE); - .bss : { - __bss_start = .; - *(.bss) - __bss_stop = .; - } - - . = ALIGN(PAGE_SIZE); - _end = . ; -} diff --git a/arch/ppc64/xmon/privinst.h b/arch/ppc64/xmon/privinst.h deleted file mode 100644 index 02eb40dac0b3..000000000000 --- a/arch/ppc64/xmon/privinst.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (C) 1996 Paul Mackerras. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#define GETREG(reg) \ - static inline unsigned long get_ ## reg (void) \ - { unsigned long ret; asm volatile ("mf" #reg " %0" : "=r" (ret) :); return ret; } - -#define SETREG(reg) \ - static inline void set_ ## reg (unsigned long val) \ - { asm volatile ("mt" #reg " %0" : : "r" (val)); } - -GETREG(msr) -SETREG(msrd) -GETREG(cr) - -#define GSETSPR(n, name) \ - static inline long get_ ## name (void) \ - { long ret; asm volatile ("mfspr %0," #n : "=r" (ret) : ); return ret; } \ - static inline void set_ ## name (long val) \ - { asm volatile ("mtspr " #n ",%0" : : "r" (val)); } - -GSETSPR(0, mq) -GSETSPR(1, xer) -GSETSPR(4, rtcu) -GSETSPR(5, rtcl) -GSETSPR(8, lr) -GSETSPR(9, ctr) -GSETSPR(18, dsisr) -GSETSPR(19, dar) -GSETSPR(22, dec) -GSETSPR(25, sdr1) -GSETSPR(26, srr0) -GSETSPR(27, srr1) -GSETSPR(272, sprg0) -GSETSPR(273, sprg1) -GSETSPR(274, sprg2) -GSETSPR(275, sprg3) -GSETSPR(282, ear) -GSETSPR(287, pvr) -GSETSPR(1008, hid0) -GSETSPR(1009, hid1) -GSETSPR(1010, iabr) -GSETSPR(1023, pir) - -static inline void store_inst(void *p) -{ - asm volatile ("dcbst 0,%0; sync; icbi 0,%0; isync" : : "r" (p)); -} - -static inline void cflush(void *p) -{ - asm volatile ("dcbf 0,%0; icbi 0,%0" : : "r" (p)); -} - -static inline void cinval(void *p) -{ - asm volatile ("dcbi 0,%0; icbi 0,%0" : : "r" (p)); -} diff --git a/arch/v850/Kconfig b/arch/v850/Kconfig index 89c053b6c2c4..310865903234 100644 --- a/arch/v850/Kconfig +++ b/arch/v850/Kconfig @@ -23,6 +23,14 @@ config GENERIC_CALIBRATE_DELAY bool default y +config GENERIC_HARDIRQS + bool + default y + +config GENERIC_IRQ_PROBE + bool + default y + # Turn off some random 386 crap that can affect device config config ISA bool diff --git a/arch/v850/kernel/irq.c b/arch/v850/kernel/irq.c index 9e85969ba976..7a151c26f82e 100644 --- a/arch/v850/kernel/irq.c +++ b/arch/v850/kernel/irq.c @@ -1,8 +1,8 @@ /* * arch/v850/kernel/irq.c -- High-level interrupt handling * - * Copyright (C) 2001,02,03,04 NEC Electronics Corporation - * Copyright (C) 2001,02,03,04 Miles Bader <miles@gnu.org> + * Copyright (C) 2001,02,03,04,05 NEC Electronics Corporation + * Copyright (C) 2001,02,03,04,05 Miles Bader <miles@gnu.org> * Copyright (C) 1994-2000 Ralf Baechle * Copyright (C) 1992 Linus Torvalds * @@ -27,55 +27,15 @@ #include <asm/system.h> /* - * Controller mappings for all interrupt sources: + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves, it doesn't deserve + * a generic callback i think. */ -irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = { - [0 ... NR_IRQS-1] = { - .handler = &no_irq_type, - .lock = SPIN_LOCK_UNLOCKED - } -}; - -/* - * Special irq handlers. - */ - -irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs) -{ - return IRQ_NONE; -} - -/* - * Generic no controller code - */ - -static void enable_none(unsigned int irq) { } -static unsigned int startup_none(unsigned int irq) { return 0; } -static void disable_none(unsigned int irq) { } -static void ack_none(unsigned int irq) +void ack_bad_irq(unsigned int irq) { - /* - * 'what should we do if we get a hw irq event on an illegal vector'. - * each architecture has to answer this themselves, it doesn't deserve - * a generic callback i think. - */ printk("received IRQ %d with unknown interrupt type\n", irq); } -/* startup is the same as "enable", shutdown is same as "disable" */ -#define shutdown_none disable_none -#define end_none enable_none - -struct hw_interrupt_type no_irq_type = { - .typename = "none", - .startup = startup_none, - .shutdown = shutdown_none, - .enable = enable_none, - .disable = disable_none, - .ack = ack_none, - .end = end_none -}; - volatile unsigned long irq_err_count, spurious_count; /* @@ -84,643 +44,68 @@ volatile unsigned long irq_err_count, spurious_count; int show_interrupts(struct seq_file *p, void *v) { - int i = *(loff_t *) v; - struct irqaction * action; - unsigned long flags; + int irq = *(loff_t *) v; - if (i == 0) { + if (irq == 0) { + int cpu; seq_puts(p, " "); - for (i=0; i < 1 /*smp_num_cpus*/; i++) - seq_printf(p, "CPU%d ", i); + for (cpu=0; cpu < 1 /*smp_num_cpus*/; cpu++) + seq_printf(p, "CPU%d ", cpu); seq_putc(p, '\n'); } - if (i < NR_IRQS) { - int j, count, num; - const char *type_name = irq_desc[i].handler->typename; - spin_lock_irqsave(&irq_desc[j].lock, flags); - action = irq_desc[i].action; - if (!action) - goto skip; + if (irq < NR_IRQS) { + unsigned long flags; + struct irqaction *action; - count = 0; - num = -1; - for (j = 0; j < NR_IRQS; j++) - if (irq_desc[j].handler->typename == type_name) { - if (i == j) - num = count; - count++; - } + spin_lock_irqsave(&irq_desc[irq].lock, flags); - seq_printf(p, "%3d: ",i); - seq_printf(p, "%10u ", kstat_irqs(i)); - if (count > 1) { - int prec = (num >= 100 ? 3 : num >= 10 ? 2 : 1); - seq_printf(p, " %*s%d", 14 - prec, type_name, num); - } else - seq_printf(p, " %14s", type_name); + action = irq_desc[irq].action; + if (action) { + int j; + int count = 0; + int num = -1; + const char *type_name = irq_desc[irq].handler->typename; + + for (j = 0; j < NR_IRQS; j++) + if (irq_desc[j].handler->typename == type_name){ + if (irq == j) + num = count; + count++; + } + + seq_printf(p, "%3d: ",irq); + seq_printf(p, "%10u ", kstat_irqs(irq)); + if (count > 1) { + int prec = (num >= 100 ? 3 : num >= 10 ? 2 : 1); + seq_printf(p, " %*s%d", 14 - prec, + type_name, num); + } else + seq_printf(p, " %14s", type_name); - seq_printf(p, " %s", action->name); - for (action=action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); - seq_putc(p, '\n'); -skip: - spin_unlock_irqrestore(&irq_desc[j].lock, flags); - } else if (i == NR_IRQS) - seq_printf(p, "ERR: %10lu\n", irq_err_count); - return 0; -} - -/* - * This should really return information about whether - * we should do bottom half handling etc. Right now we - * end up _always_ checking the bottom half, which is a - * waste of time and is not what some drivers would - * prefer. - */ -int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action) -{ - int status = 1; /* Force the "do bottom halves" bit */ - int ret; - - if (!(action->flags & SA_INTERRUPT)) - local_irq_enable(); - - do { - ret = action->handler(irq, action->dev_id, regs); - if (ret == IRQ_HANDLED) - status |= action->flags; - action = action->next; - } while (action); - if (status & SA_SAMPLE_RANDOM) - add_interrupt_randomness(irq); - local_irq_disable(); - - return status; -} - -/* - * Generic enable/disable code: this just calls - * down into the PIC-specific version for the actual - * hardware disable after having gotten the irq - * controller lock. - */ - -/** - * disable_irq_nosync - disable an irq without waiting - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Disables of an interrupt - * stack. Unlike disable_irq(), this function does not ensure existing - * instances of the IRQ handler have completed before returning. - * - * This function may be called from IRQ context. - */ - -void inline disable_irq_nosync(unsigned int irq) -{ - irq_desc_t *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - if (!desc->depth++) { - desc->status |= IRQ_DISABLED; - desc->handler->disable(irq); - } - spin_unlock_irqrestore(&desc->lock, flags); -} - -/** - * disable_irq - disable an irq and wait for completion - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Disables of an interrupt - * stack. That is for two disables you need two enables. This - * function waits for any pending IRQ handlers for this interrupt - * to complete before returning. If you use this function while - * holding a resource the IRQ handler may need you will deadlock. - * - * This function may be called - with care - from IRQ context. - */ - -void disable_irq(unsigned int irq) -{ - disable_irq_nosync(irq); - synchronize_irq(irq); -} + seq_printf(p, " %s", action->name); + for (action=action->next; action; action = action->next) + seq_printf(p, ", %s", action->name); + seq_putc(p, '\n'); + } -/** - * enable_irq - enable interrupt handling on an irq - * @irq: Interrupt to enable - * - * Re-enables the processing of interrupts on this IRQ line - * providing no disable_irq calls are now in effect. - * - * This function may be called from IRQ context. - */ - -void enable_irq(unsigned int irq) -{ - irq_desc_t *desc = irq_desc + irq; - unsigned long flags; + spin_unlock_irqrestore(&irq_desc[irq].lock, flags); + } else if (irq == NR_IRQS) + seq_printf(p, "ERR: %10lu\n", irq_err_count); - spin_lock_irqsave(&desc->lock, flags); - switch (desc->depth) { - case 1: { - unsigned int status = desc->status & ~IRQ_DISABLED; - desc->status = status; - if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { - desc->status = status | IRQ_REPLAY; - hw_resend_irq(desc->handler,irq); - } - desc->handler->enable(irq); - /* fall-through */ - } - default: - desc->depth--; - break; - case 0: - printk("enable_irq(%u) unbalanced from %p\n", irq, - __builtin_return_address(0)); - } - spin_unlock_irqrestore(&desc->lock, flags); + return 0; } /* Handle interrupt IRQ. REGS are the registers at the time of ther interrupt. */ unsigned int handle_irq (int irq, struct pt_regs *regs) { - /* - * We ack quickly, we don't want the irq controller - * thinking we're snobs just because some other CPU has - * disabled global interrupts (we have already done the - * INT_ACK cycles, it's too late to try to pretend to the - * controller that we aren't taking the interrupt). - * - * 0 return value means that this irq is already being - * handled by some other CPU. (or is disabled) - */ - int cpu = smp_processor_id(); - irq_desc_t *desc = irq_desc + irq; - struct irqaction * action; - unsigned int status; - irq_enter(); - kstat_cpu(cpu).irqs[irq]++; - spin_lock(&desc->lock); - desc->handler->ack(irq); - /* - REPLAY is when Linux resends an IRQ that was dropped earlier - WAITING is used by probe to mark irqs that are being tested - */ - status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); - status |= IRQ_PENDING; /* we _want_ to handle it */ - - /* - * If the IRQ is disabled for whatever reason, we cannot - * use the action we have. - */ - action = NULL; - if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) { - action = desc->action; - status &= ~IRQ_PENDING; /* we commit to handling */ - status |= IRQ_INPROGRESS; /* we are handling it */ - } - desc->status = status; - - /* - * If there is no IRQ handler or it was disabled, exit early. - Since we set PENDING, if another processor is handling - a different instance of this same irq, the other processor - will take care of it. - */ - if (unlikely(!action)) - goto out; - - /* - * Edge triggered interrupts need to remember - * pending events. - * This applies to any hw interrupts that allow a second - * instance of the same irq to arrive while we are in handle_irq - * or in the handler. But the code here only handles the _second_ - * instance of the irq, not the third or fourth. So it is mostly - * useful for irq hardware that does not mask cleanly in an - * SMP environment. - */ - for (;;) { - spin_unlock(&desc->lock); - handle_IRQ_event(irq, regs, action); - spin_lock(&desc->lock); - - if (likely(!(desc->status & IRQ_PENDING))) - break; - desc->status &= ~IRQ_PENDING; - } - desc->status &= ~IRQ_INPROGRESS; - -out: - /* - * The ->end() handler has to deal with interrupts which got - * disabled while the handler was running. - */ - desc->handler->end(irq); - spin_unlock(&desc->lock); - + __do_IRQ(irq, regs); irq_exit(); - return 1; } -/** - * request_irq - allocate an interrupt line - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs - * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device - * @dev_id: A cookie passed back to the handler function - * - * This call allocates interrupt resources and enables the - * interrupt line and IRQ handling. From the point this - * call is made your handler function may be invoked. Since - * your handler function must clear any interrupt the board - * raises, you must take care both to initialise your hardware - * and to set up the interrupt handler in the right order. - * - * Dev_id must be globally unique. Normally the address of the - * device data structure is used as the cookie. Since the handler - * receives this value it makes sense to use it. - * - * If your interrupt is shared you must pass a non NULL dev_id - * as this is required when freeing the interrupt. - * - * Flags: - * - * SA_SHIRQ Interrupt is shared - * - * SA_INTERRUPT Disable local interrupts while processing - * - * SA_SAMPLE_RANDOM The interrupt can be used for entropy - * - */ - -int request_irq(unsigned int irq, - irqreturn_t (*handler)(int, void *, struct pt_regs *), - unsigned long irqflags, - const char * devname, - void *dev_id) -{ - int retval; - struct irqaction * action; - -#if 1 - /* - * Sanity-check: shared interrupts should REALLY pass in - * a real dev-ID, otherwise we'll have trouble later trying - * to figure out which interrupt is which (messes up the - * interrupt freeing logic etc). - */ - if (irqflags & SA_SHIRQ) { - if (!dev_id) - printk("Bad boy: %s (at 0x%x) called us without a dev_id!\n", devname, (&irq)[-1]); - } -#endif - - if (irq >= NR_IRQS) - return -EINVAL; - if (!handler) - return -EINVAL; - - action = (struct irqaction *) - kmalloc(sizeof(struct irqaction), GFP_KERNEL); - if (!action) - return -ENOMEM; - - action->handler = handler; - action->flags = irqflags; - cpus_clear(action->mask); - action->name = devname; - action->next = NULL; - action->dev_id = dev_id; - - retval = setup_irq(irq, action); - if (retval) - kfree(action); - return retval; -} - -EXPORT_SYMBOL(request_irq); - -/** - * free_irq - free an interrupt - * @irq: Interrupt line to free - * @dev_id: Device identity to free - * - * Remove an interrupt handler. The handler is removed and if the - * interrupt line is no longer in use by any driver it is disabled. - * On a shared IRQ the caller must ensure the interrupt is disabled - * on the card it drives before calling this function. The function - * does not return until any executing interrupts for this IRQ - * have completed. - * - * This function may be called from interrupt context. - * - * Bugs: Attempting to free an irq in a handler for the same irq hangs - * the machine. - */ - -void free_irq(unsigned int irq, void *dev_id) -{ - irq_desc_t *desc; - struct irqaction **p; - unsigned long flags; - - if (irq >= NR_IRQS) - return; - - desc = irq_desc + irq; - spin_lock_irqsave(&desc->lock,flags); - p = &desc->action; - for (;;) { - struct irqaction * action = *p; - if (action) { - struct irqaction **pp = p; - p = &action->next; - if (action->dev_id != dev_id) - continue; - - /* Found it - now remove it from the list of entries */ - *pp = action->next; - if (!desc->action) { - desc->status |= IRQ_DISABLED; - desc->handler->shutdown(irq); - } - spin_unlock_irqrestore(&desc->lock,flags); - - synchronize_irq(irq); - kfree(action); - return; - } - printk("Trying to free free IRQ%d\n",irq); - spin_unlock_irqrestore(&desc->lock,flags); - return; - } -} - -EXPORT_SYMBOL(free_irq); - -/* - * IRQ autodetection code.. - * - * This depends on the fact that any interrupt that - * comes in on to an unassigned handler will get stuck - * with "IRQ_WAITING" cleared and the interrupt - * disabled. - */ - -static DECLARE_MUTEX(probe_sem); - -/** - * probe_irq_on - begin an interrupt autodetect - * - * Commence probing for an interrupt. The interrupts are scanned - * and a mask of potential interrupt lines is returned. - * - */ - -unsigned long probe_irq_on(void) -{ - unsigned int i; - irq_desc_t *desc; - unsigned long val; - unsigned long delay; - - down(&probe_sem); - /* - * something may have generated an irq long ago and we want to - * flush such a longstanding irq before considering it as spurious. - */ - for (i = NR_IRQS-1; i > 0; i--) { - desc = irq_desc + i; - - spin_lock_irq(&desc->lock); - if (!irq_desc[i].action) - irq_desc[i].handler->startup(i); - spin_unlock_irq(&desc->lock); - } - - /* Wait for longstanding interrupts to trigger. */ - for (delay = jiffies + HZ/50; time_after(delay, jiffies); ) - /* about 20ms delay */ barrier(); - - /* - * enable any unassigned irqs - * (we must startup again here because if a longstanding irq - * happened in the previous stage, it may have masked itself) - */ - for (i = NR_IRQS-1; i > 0; i--) { - desc = irq_desc + i; - - spin_lock_irq(&desc->lock); - if (!desc->action) { - desc->status |= IRQ_AUTODETECT | IRQ_WAITING; - if (desc->handler->startup(i)) - desc->status |= IRQ_PENDING; - } - spin_unlock_irq(&desc->lock); - } - - /* - * Wait for spurious interrupts to trigger - */ - for (delay = jiffies + HZ/10; time_after(delay, jiffies); ) - /* about 100ms delay */ barrier(); - - /* - * Now filter out any obviously spurious interrupts - */ - val = 0; - for (i = 0; i < NR_IRQS; i++) { - irq_desc_t *desc = irq_desc + i; - unsigned int status; - - spin_lock_irq(&desc->lock); - status = desc->status; - - if (status & IRQ_AUTODETECT) { - /* It triggered already - consider it spurious. */ - if (!(status & IRQ_WAITING)) { - desc->status = status & ~IRQ_AUTODETECT; - desc->handler->shutdown(i); - } else - if (i < 32) - val |= 1 << i; - } - spin_unlock_irq(&desc->lock); - } - - return val; -} - -EXPORT_SYMBOL(probe_irq_on); - -/* - * Return a mask of triggered interrupts (this - * can handle only legacy ISA interrupts). - */ - -/** - * probe_irq_mask - scan a bitmap of interrupt lines - * @val: mask of interrupts to consider - * - * Scan the ISA bus interrupt lines and return a bitmap of - * active interrupts. The interrupt probe logic state is then - * returned to its previous value. - * - * Note: we need to scan all the irq's even though we will - * only return ISA irq numbers - just so that we reset them - * all to a known state. - */ -unsigned int probe_irq_mask(unsigned long val) -{ - int i; - unsigned int mask; - - mask = 0; - for (i = 0; i < NR_IRQS; i++) { - irq_desc_t *desc = irq_desc + i; - unsigned int status; - - spin_lock_irq(&desc->lock); - status = desc->status; - - if (status & IRQ_AUTODETECT) { - if (i < 16 && !(status & IRQ_WAITING)) - mask |= 1 << i; - - desc->status = status & ~IRQ_AUTODETECT; - desc->handler->shutdown(i); - } - spin_unlock_irq(&desc->lock); - } - up(&probe_sem); - - return mask & val; -} - -/* - * Return the one interrupt that triggered (this can - * handle any interrupt source). - */ - -/** - * probe_irq_off - end an interrupt autodetect - * @val: mask of potential interrupts (unused) - * - * Scans the unused interrupt lines and returns the line which - * appears to have triggered the interrupt. If no interrupt was - * found then zero is returned. If more than one interrupt is - * found then minus the first candidate is returned to indicate - * their is doubt. - * - * The interrupt probe logic state is returned to its previous - * value. - * - * BUGS: When used in a module (which arguably shouldnt happen) - * nothing prevents two IRQ probe callers from overlapping. The - * results of this are non-optimal. - */ - -int probe_irq_off(unsigned long val) -{ - int i, irq_found, nr_irqs; - - nr_irqs = 0; - irq_found = 0; - for (i = 0; i < NR_IRQS; i++) { - irq_desc_t *desc = irq_desc + i; - unsigned int status; - - spin_lock_irq(&desc->lock); - status = desc->status; - - if (status & IRQ_AUTODETECT) { - if (!(status & IRQ_WAITING)) { - if (!nr_irqs) - irq_found = i; - nr_irqs++; - } - desc->status = status & ~IRQ_AUTODETECT; - desc->handler->shutdown(i); - } - spin_unlock_irq(&desc->lock); - } - up(&probe_sem); - - if (nr_irqs > 1) - irq_found = -irq_found; - return irq_found; -} - -EXPORT_SYMBOL(probe_irq_off); - -/* this was setup_x86_irq but it seems pretty generic */ -int setup_irq(unsigned int irq, struct irqaction * new) -{ - int shared = 0; - unsigned long flags; - struct irqaction *old, **p; - irq_desc_t *desc = irq_desc + irq; - - /* - * Some drivers like serial.c use request_irq() heavily, - * so we have to be careful not to interfere with a - * running system. - */ - if (new->flags & SA_SAMPLE_RANDOM) { - /* - * This function might sleep, we want to call it first, - * outside of the atomic block. - * Yes, this might clear the entropy pool if the wrong - * driver is attempted to be loaded, without actually - * installing a new handler, but is this really a problem, - * only the sysadmin is able to do this. - */ - rand_initialize_irq(irq); - } - - /* - * The following block of code has to be executed atomically - */ - spin_lock_irqsave(&desc->lock,flags); - p = &desc->action; - if ((old = *p) != NULL) { - /* Can't share interrupts unless both agree to */ - if (!(old->flags & new->flags & SA_SHIRQ)) { - spin_unlock_irqrestore(&desc->lock,flags); - return -EBUSY; - } - - /* add new interrupt at end of irq queue */ - do { - p = &old->next; - old = *p; - } while (old); - shared = 1; - } - - *p = new; - - if (!shared) { - desc->depth = 0; - desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING | IRQ_INPROGRESS); - desc->handler->startup(irq); - } - spin_unlock_irqrestore(&desc->lock,flags); - - /* register_irq_proc(irq); */ - return 0; -} - /* Initialize irq handling for IRQs. BASE_IRQ, BASE_IRQ+INTERVAL, ..., BASE_IRQ+NUM*INTERVAL to IRQ_TYPE. An IRQ_TYPE of 0 means to use a generic interrupt type. */ @@ -736,9 +121,3 @@ init_irq_handlers (int base_irq, int num, int interval, base_irq += interval; } } - -#if defined(CONFIG_PROC_FS) && defined(CONFIG_SYSCTL) -void init_irq_proc(void) -{ -} -#endif /* CONFIG_PROC_FS && CONFIG_SYSCTL */ diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 4cce2f6f170c..6ece645e4dbe 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -226,22 +226,42 @@ config SCHED_SMT source "kernel/Kconfig.preempt" -config K8_NUMA - bool "K8 NUMA support" - select NUMA +config NUMA + bool "Non Uniform Memory Access (NUMA) Support" depends on SMP help - Enable NUMA (Non Unified Memory Architecture) support for - AMD Opteron Multiprocessor systems. The kernel will try to allocate - memory used by a CPU on the local memory controller of the CPU - and add some more NUMA awareness to the kernel. - This code is recommended on all multiprocessor Opteron systems - and normally doesn't hurt on others. + Enable NUMA (Non Uniform Memory Access) support. The kernel + will try to allocate memory used by a CPU on the local memory + controller of the CPU and add some more NUMA awareness to the kernel. + This code is recommended on all multiprocessor Opteron systems. + If the system is EM64T, you should say N unless your system is EM64T + NUMA. + +config K8_NUMA + bool "Old style AMD Opteron NUMA detection" + depends on NUMA + default y + help + Enable K8 NUMA node topology detection. You should say Y here if + you have a multi processor AMD K8 system. This uses an old + method to read the NUMA configurtion directly from the builtin + Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA + instead, which also takes priority if both are compiled in. + +# Dummy CONFIG option to select ACPI_NUMA from drivers/acpi/Kconfig. + +config X86_64_ACPI_NUMA + bool "ACPI NUMA detection" + depends on NUMA + select ACPI + select ACPI_NUMA + default y + help + Enable ACPI SRAT based node topology detection. config NUMA_EMU - bool "NUMA emulation support" - select NUMA - depends on SMP + bool "NUMA emulation" + depends on NUMA help Enable NUMA emulation. A flat machine will be split into virtual nodes when booted with "numa=fake=N", where N is the @@ -252,9 +272,6 @@ config ARCH_DISCONTIGMEM_ENABLE depends on NUMA default y -config NUMA - bool - default n config ARCH_DISCONTIGMEM_ENABLE def_bool y @@ -374,6 +391,14 @@ config X86_MCE_INTEL Additional support for intel specific MCE features such as the thermal monitor. +config X86_MCE_AMD + bool "AMD MCE features" + depends on X86_MCE && X86_LOCAL_APIC + default y + help + Additional support for AMD specific MCE features such as + the DRAM Error Threshold. + config PHYSICAL_START hex "Physical address where the kernel is loaded" if EMBEDDED default "0x100000" @@ -502,7 +527,7 @@ config IA32_EMULATION left. config IA32_AOUT - bool "IA32 a.out support" + tristate "IA32 a.out support" depends on IA32_EMULATION help Support old a.out binaries in the 32bit emulation. diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug index d584ecc27ea1..e2c6e64a85ec 100644 --- a/arch/x86_64/Kconfig.debug +++ b/arch/x86_64/Kconfig.debug @@ -2,15 +2,6 @@ menu "Kernel hacking" source "lib/Kconfig.debug" -# !SMP for now because the context switch early causes GPF in segment reloading -# and the GS base checking does the wrong thing then, causing a hang. -config CHECKING - bool "Additional run-time checks" - depends on DEBUG_KERNEL && !SMP - help - Enables some internal consistency checks for kernel debugging. - You should normally say N. - config INIT_DEBUG bool "Debug __init statements" depends on DEBUG_KERNEL diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index f8db7e500fbf..5d56542fb68f 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.13-git11 -# Mon Sep 12 16:16:16 2005 +# Linux kernel version: 2.6.14-git7 +# Sat Nov 5 15:55:50 2005 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -35,7 +35,7 @@ CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set CONFIG_SYSCTL=y # CONFIG_AUDIT is not set -# CONFIG_HOTPLUG is not set +CONFIG_HOTPLUG=y CONFIG_KOBJECT_UEVENT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y @@ -93,10 +93,11 @@ CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set CONFIG_PREEMPT_BKL=y +CONFIG_NUMA=y CONFIG_K8_NUMA=y +CONFIG_X86_64_ACPI_NUMA=y # CONFIG_NUMA_EMU is not set CONFIG_ARCH_DISCONTIGMEM_ENABLE=y -CONFIG_NUMA=y CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_SELECT_MEMORY_MODEL=y @@ -107,9 +108,10 @@ CONFIG_DISCONTIGMEM=y CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_NEED_MULTIPLE_NODES=y # CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y -CONFIG_HAVE_DEC_LOCK=y CONFIG_NR_CPUS=32 +CONFIG_HOTPLUG_CPU=y CONFIG_HPET_TIMER=y CONFIG_X86_PM_TIMER=y CONFIG_HPET_EMULATE_RTC=y @@ -117,6 +119,7 @@ CONFIG_GART_IOMMU=y CONFIG_SWIOTLB=y CONFIG_X86_MCE=y CONFIG_X86_MCE_INTEL=y +CONFIG_X86_MCE_AMD=y CONFIG_PHYSICAL_START=0x100000 # CONFIG_KEXEC is not set CONFIG_SECCOMP=y @@ -136,11 +139,15 @@ CONFIG_PM=y # CONFIG_PM_DEBUG is not set CONFIG_SOFTWARE_SUSPEND=y CONFIG_PM_STD_PARTITION="" +CONFIG_SUSPEND_SMP=y # # ACPI (Advanced Configuration and Power Interface) Support # CONFIG_ACPI=y +CONFIG_ACPI_SLEEP=y +CONFIG_ACPI_SLEEP_PROC_FS=y +CONFIG_ACPI_SLEEP_PROC_SLEEP=y CONFIG_ACPI_AC=y CONFIG_ACPI_BATTERY=y CONFIG_ACPI_BUTTON=y @@ -148,6 +155,7 @@ CONFIG_ACPI_BUTTON=y CONFIG_ACPI_HOTKEY=m CONFIG_ACPI_FAN=y CONFIG_ACPI_PROCESSOR=y +CONFIG_ACPI_HOTPLUG_CPU=y CONFIG_ACPI_THERMAL=y CONFIG_ACPI_NUMA=y # CONFIG_ACPI_ASUS is not set @@ -158,7 +166,7 @@ CONFIG_ACPI_BLACKLIST_YEAR=2001 CONFIG_ACPI_EC=y CONFIG_ACPI_POWER=y CONFIG_ACPI_SYSTEM=y -# CONFIG_ACPI_CONTAINER is not set +CONFIG_ACPI_CONTAINER=y # # CPU Frequency scaling @@ -293,7 +301,6 @@ CONFIG_IPV6=y # Network testing # # CONFIG_NET_PKTGEN is not set -# CONFIG_NETFILTER_NETLINK is not set # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set @@ -312,6 +319,11 @@ CONFIG_PREVENT_FIRMWARE_BUILD=y # CONFIG_DEBUG_DRIVER is not set # +# Connector - unified userspace <-> kernelspace linker +# +# CONFIG_CONNECTOR is not set + +# # Memory Technology Devices (MTD) # # CONFIG_MTD is not set @@ -354,6 +366,11 @@ CONFIG_IOSCHED_NOOP=y # CONFIG_IOSCHED_AS is not set CONFIG_IOSCHED_DEADLINE=y CONFIG_IOSCHED_CFQ=y +# CONFIG_DEFAULT_AS is not set +CONFIG_DEFAULT_DEADLINE=y +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="cfq" # CONFIG_ATA_OVER_ETH is not set # @@ -450,6 +467,7 @@ CONFIG_BLK_DEV_SD=y CONFIG_SCSI_SPI_ATTRS=y # CONFIG_SCSI_FC_ATTRS is not set # CONFIG_SCSI_ISCSI_ATTRS is not set +# CONFIG_SCSI_SAS_ATTRS is not set # # SCSI low-level drivers @@ -469,20 +487,24 @@ CONFIG_AIC79XX_DEBUG_MASK=0 # CONFIG_AIC79XX_REG_PRETTY_PRINT is not set # CONFIG_MEGARAID_NEWGEN is not set # CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set CONFIG_SCSI_SATA=y # CONFIG_SCSI_SATA_AHCI is not set # CONFIG_SCSI_SATA_SVW is not set CONFIG_SCSI_ATA_PIIX=y # CONFIG_SCSI_SATA_MV is not set -# CONFIG_SCSI_SATA_NV is not set -# CONFIG_SCSI_SATA_PROMISE is not set +CONFIG_SCSI_SATA_NV=y +# CONFIG_SCSI_PDC_ADMA is not set # CONFIG_SCSI_SATA_QSTOR is not set +# CONFIG_SCSI_SATA_PROMISE is not set # CONFIG_SCSI_SATA_SX4 is not set # CONFIG_SCSI_SATA_SIL is not set +# CONFIG_SCSI_SATA_SIL24 is not set # CONFIG_SCSI_SATA_SIS is not set # CONFIG_SCSI_SATA_ULI is not set CONFIG_SCSI_SATA_VIA=y # CONFIG_SCSI_SATA_VITESSE is not set +CONFIG_SCSI_SATA_INTEL_COMBINED=y # CONFIG_SCSI_BUSLOGIC is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_EATA is not set @@ -525,6 +547,7 @@ CONFIG_BLK_DEV_DM=y CONFIG_FUSION=y CONFIG_FUSION_SPI=y # CONFIG_FUSION_FC is not set +# CONFIG_FUSION_SAS is not set CONFIG_FUSION_MAX_SGE=128 # CONFIG_FUSION_CTL is not set @@ -564,6 +587,7 @@ CONFIG_NET_ETHERNET=y CONFIG_MII=y # CONFIG_HAPPYMEAL is not set # CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set CONFIG_NET_VENDOR_3COM=y CONFIG_VORTEX=y # CONFIG_TYPHOON is not set @@ -740,7 +764,43 @@ CONFIG_LEGACY_PTY_COUNT=256 # # Watchdog Cards # -# CONFIG_WATCHDOG is not set +CONFIG_WATCHDOG=y +# CONFIG_WATCHDOG_NOWAYOUT is not set + +# +# Watchdog Device Drivers +# +CONFIG_SOFT_WATCHDOG=y +# CONFIG_ACQUIRE_WDT is not set +# CONFIG_ADVANTECH_WDT is not set +# CONFIG_ALIM1535_WDT is not set +# CONFIG_ALIM7101_WDT is not set +# CONFIG_SC520_WDT is not set +# CONFIG_EUROTECH_WDT is not set +# CONFIG_IB700_WDT is not set +# CONFIG_IBMASR is not set +# CONFIG_WAFER_WDT is not set +# CONFIG_I6300ESB_WDT is not set +# CONFIG_I8XX_TCO is not set +# CONFIG_SC1200_WDT is not set +# CONFIG_60XX_WDT is not set +# CONFIG_SBC8360_WDT is not set +# CONFIG_CPU5_WDT is not set +# CONFIG_W83627HF_WDT is not set +# CONFIG_W83877F_WDT is not set +# CONFIG_W83977F_WDT is not set +# CONFIG_MACHZ_WDT is not set + +# +# PCI-based Watchdog Cards +# +# CONFIG_PCIPCWATCHDOG is not set +# CONFIG_WDTPCI is not set + +# +# USB-based Watchdog Cards +# +# CONFIG_USBPCWATCHDOG is not set CONFIG_HW_RANDOM=y # CONFIG_NVRAM is not set CONFIG_RTC=y @@ -767,6 +827,7 @@ CONFIG_MAX_RAW_DEVS=256 # TPM devices # # CONFIG_TCG_TPM is not set +# CONFIG_TELCLOCK is not set # # I2C support @@ -783,6 +844,7 @@ CONFIG_MAX_RAW_DEVS=256 # CONFIG_HWMON=y # CONFIG_HWMON_VID is not set +# CONFIG_SENSORS_HDAPS is not set # CONFIG_HWMON_DEBUG_CHIP is not set # @@ -886,12 +948,15 @@ CONFIG_USB_UHCI_HCD=y # USB Device Class drivers # # CONFIG_OBSOLETE_OSS_USB_DRIVER is not set -# CONFIG_USB_BLUETOOTH_TTY is not set # CONFIG_USB_ACM is not set CONFIG_USB_PRINTER=y # -# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# + +# +# may also be needed; see USB_STORAGE Help for more information # CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_DEBUG is not set @@ -924,6 +989,7 @@ CONFIG_USB_HIDINPUT=y # CONFIG_USB_XPAD is not set # CONFIG_USB_ATI_REMOTE is not set # CONFIG_USB_KEYSPAN_REMOTE is not set +# CONFIG_USB_APPLETOUCH is not set # # USB Imaging devices @@ -1005,7 +1071,7 @@ CONFIG_USB_MON=y # # CONFIG_EDD is not set # CONFIG_DELL_RBU is not set -CONFIG_DCDBAS=m +# CONFIG_DCDBAS is not set # # File systems @@ -1037,7 +1103,7 @@ CONFIG_INOTIFY=y # CONFIG_QUOTA is not set CONFIG_DNOTIFY=y CONFIG_AUTOFS_FS=y -# CONFIG_AUTOFS4_FS is not set +CONFIG_AUTOFS4_FS=y # CONFIG_FUSE_FS is not set # @@ -1068,7 +1134,7 @@ CONFIG_TMPFS=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y -# CONFIG_RELAYFS_FS is not set +CONFIG_RELAYFS_FS=y # # Miscellaneous filesystems @@ -1186,7 +1252,9 @@ CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set CONFIG_DEBUG_FS=y +# CONFIG_DEBUG_VM is not set # CONFIG_FRAME_POINTER is not set +# CONFIG_RCU_TORTURE_TEST is not set CONFIG_INIT_DEBUG=y # CONFIG_IOMMU_DEBUG is not set CONFIG_KPROBES=y diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c index 93c60f4aa47a..3bf58af98936 100644 --- a/arch/x86_64/ia32/ia32_aout.c +++ b/arch/x86_64/ia32/ia32_aout.c @@ -36,9 +36,6 @@ #undef WARN_OLD #undef CORE_DUMP /* probably broken */ -extern int ia32_setup_arg_pages(struct linux_binprm *bprm, - unsigned long stack_top, int exec_stack); - static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); static int load_aout_library(struct file*); diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index d9161e395978..830feb272eca 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c @@ -335,7 +335,8 @@ static void elf32_init(struct pt_regs *regs) me->thread.es = __USER_DS; } -int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack) +int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, + int executable_stack) { unsigned long stack_base; struct vm_area_struct *mpnt; @@ -389,6 +390,7 @@ int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int exec return 0; } +EXPORT_SYMBOL(ia32_setup_arg_pages); static unsigned long elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type) diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 14328cab5d3a..fe4cbd1c4b2f 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile @@ -11,6 +11,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \ obj-$(CONFIG_X86_MCE) += mce.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o +obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ obj-$(CONFIG_ACPI) += acpi/ obj-$(CONFIG_X86_MSR) += msr.o diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index 962ad4823b6a..c7f4fdd20f05 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c @@ -196,7 +196,7 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) void __init iommu_hole_init(void) { int fix, num; - u32 aper_size, aper_alloc = 0, aper_order, last_aper_order = 0; + u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; u64 aper_base, last_aper_base = 0; int valid_agp = 0; diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index b6e7715d877f..18691ce4c759 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -833,6 +833,16 @@ int setup_profiling_timer(unsigned int multiplier) return 0; } +#ifdef CONFIG_X86_MCE_AMD +void setup_threshold_lvt(unsigned long lvt_off) +{ + unsigned int v = 0; + unsigned long reg = (lvt_off << 4) + 0x500; + v |= THRESHOLD_APIC_VECTOR; + apic_write(reg, v); +} +#endif /* CONFIG_X86_MCE_AMD */ + #undef APIC_DIVISOR /* diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index ab3f87aaff70..17579a1a174b 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -23,8 +23,7 @@ #include <asm/e820.h> #include <asm/proto.h> #include <asm/bootsetup.h> - -extern char _end[]; +#include <asm/sections.h> /* * PFN of last memory page. diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 7937971d1853..9ff42041bb6b 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -612,6 +612,9 @@ retint_kernel: ENTRY(thermal_interrupt) apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt +ENTRY(threshold_interrupt) + apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt + #ifdef CONFIG_SMP ENTRY(reschedule_interrupt) apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index b92e5f45ed46..15290968e49d 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S @@ -12,6 +12,7 @@ #include <linux/linkage.h> #include <linux/threads.h> +#include <linux/init.h> #include <asm/desc.h> #include <asm/segment.h> #include <asm/page.h> @@ -70,7 +71,7 @@ startup_32: movl %eax, %cr4 /* Setup early boot stage 4 level pagetables */ - movl $(init_level4_pgt - __START_KERNEL_map), %eax + movl $(boot_level4_pgt - __START_KERNEL_map), %eax movl %eax, %cr3 /* Setup EFER (Extended Feature Enable Register) */ @@ -113,7 +114,7 @@ startup_64: movq %rax, %cr4 /* Setup early boot stage 4 level pagetables. */ - movq $(init_level4_pgt - __START_KERNEL_map), %rax + movq $(boot_level4_pgt - __START_KERNEL_map), %rax movq %rax, %cr3 /* Check if nx is implemented */ @@ -240,20 +241,10 @@ ljumpvector: ENTRY(stext) ENTRY(_stext) - /* - * This default setting generates an ident mapping at address 0x100000 - * and a mapping for the kernel that precisely maps virtual address - * 0xffffffff80000000 to physical address 0x000000. (always using - * 2Mbyte large pages provided by PAE mode) - */ .org 0x1000 ENTRY(init_level4_pgt) - .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */ - .fill 255,8,0 - .quad 0x000000000000a007 + __PHYSICAL_START - .fill 254,8,0 - /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ - .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ + /* This gets initialized in x86_64_start_kernel */ + .fill 512,8,0 .org 0x2000 ENTRY(level3_ident_pgt) @@ -350,6 +341,24 @@ ENTRY(wakeup_level4_pgt) .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ #endif +#ifndef CONFIG_HOTPLUG_CPU + __INITDATA +#endif + /* + * This default setting generates an ident mapping at address 0x100000 + * and a mapping for the kernel that precisely maps virtual address + * 0xffffffff80000000 to physical address 0x000000. (always using + * 2Mbyte large pages provided by PAE mode) + */ + .align PAGE_SIZE +ENTRY(boot_level4_pgt) + .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */ + .fill 255,8,0 + .quad 0x000000000000a007 + __PHYSICAL_START + .fill 254,8,0 + /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ + .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ + .data .align 16 diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c index cf6ab147a2a5..b675c5add01e 100644 --- a/arch/x86_64/kernel/head64.c +++ b/arch/x86_64/kernel/head64.c @@ -19,14 +19,15 @@ #include <asm/bootsetup.h> #include <asm/setup.h> #include <asm/desc.h> +#include <asm/pgtable.h> +#include <asm/sections.h> /* Don't add a printk in there. printk relies on the PDA which is not initialized yet. */ static void __init clear_bss(void) { - extern char __bss_start[], __bss_end[]; memset(__bss_start, 0, - (unsigned long) __bss_end - (unsigned long) __bss_start); + (unsigned long) __bss_stop - (unsigned long) __bss_start); } #define NEW_CL_POINTER 0x228 /* Relative to real mode data */ @@ -75,8 +76,6 @@ static void __init setup_boot_cpu_data(void) boot_cpu_data.x86_mask = eax & 0xf; } -extern char _end[]; - void __init x86_64_start_kernel(char * real_mode_data) { char *s; @@ -86,6 +85,13 @@ void __init x86_64_start_kernel(char * real_mode_data) set_intr_gate(i, early_idt_handler); asm volatile("lidt %0" :: "m" (idt_descr)); clear_bss(); + + /* + * switch to init_level4_pgt from boot_level4_pgt + */ + memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t)); + asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); + pda_init(0); copy_bootdata(real_mode_data); #ifdef CONFIG_SMP diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c index a9368d4c4aba..6e5101ad3d1a 100644 --- a/arch/x86_64/kernel/i8259.c +++ b/arch/x86_64/kernel/i8259.c @@ -492,6 +492,7 @@ void invalidate_interrupt5(void); void invalidate_interrupt6(void); void invalidate_interrupt7(void); void thermal_interrupt(void); +void threshold_interrupt(void); void i8254_timer_resume(void); static void setup_timer_hardware(void) @@ -580,6 +581,7 @@ void __init init_IRQ(void) set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); #endif set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); + set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); #ifdef CONFIG_X86_LOCAL_APIC /* self generated IPI for local APIC timer */ diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index c8eee20cd519..97154ab058b4 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -57,7 +57,7 @@ int nr_ioapic_registers[MAX_IO_APICS]; * Rough estimation of how many shared IRQs there are, can * be changed anytime. */ -#define MAX_PLUS_SHARED_IRQS NR_IRQS +#define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) /* @@ -85,6 +85,7 @@ int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; int pin; \ struct irq_pin_list *entry = irq_2_pin + irq; \ \ + BUG_ON(irq >= NR_IRQS); \ for (;;) { \ unsigned int reg; \ pin = entry->pin; \ @@ -127,6 +128,8 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) } #endif +static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF }; + /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super @@ -137,6 +140,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) static int first_free_entry = NR_IRQS; struct irq_pin_list *entry = irq_2_pin + irq; + BUG_ON(irq >= NR_IRQS); while (entry->next) entry = irq_2_pin + entry->next; @@ -144,7 +148,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) entry->next = first_free_entry; entry = irq_2_pin + entry->next; if (++first_free_entry >= PIN_MAP_SIZE) - panic("io_apic.c: whoops"); + panic("io_apic.c: ran out of irq_2_pin entries!"); } entry->apic = apic; entry->pin = pin; @@ -420,6 +424,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) best_guess = irq; } } + BUG_ON(best_guess >= NR_IRQS); return best_guess; } @@ -610,6 +615,64 @@ static inline int irq_trigger(int idx) return MPBIOS_trigger(idx); } +static int next_irq = 16; + +/* + * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ + * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number + * from ACPI, which can reach 800 in large boxen. + * + * Compact the sparse GSI space into a sequential IRQ series and reuse + * vectors if possible. + */ +int gsi_irq_sharing(int gsi) +{ + int i, tries, vector; + + BUG_ON(gsi >= NR_IRQ_VECTORS); + + if (platform_legacy_irq(gsi)) + return gsi; + + if (gsi_2_irq[gsi] != 0xFF) + return (int)gsi_2_irq[gsi]; + + tries = NR_IRQS; + try_again: + vector = assign_irq_vector(gsi); + + /* + * Sharing vectors means sharing IRQs, so scan irq_vectors for previous + * use of vector and if found, return that IRQ. However, we never want + * to share legacy IRQs, which usually have a different trigger mode + * than PCI. + */ + for (i = 0; i < NR_IRQS; i++) + if (IO_APIC_VECTOR(i) == vector) + break; + if (platform_legacy_irq(i)) { + if (--tries >= 0) { + IO_APIC_VECTOR(i) = 0; + goto try_again; + } + panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi); + } + if (i < NR_IRQS) { + gsi_2_irq[gsi] = i; + printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n", + gsi, vector, i); + return i; + } + + i = next_irq++; + BUG_ON(i >= NR_IRQS); + gsi_2_irq[gsi] = i; + IO_APIC_VECTOR(i) = vector; + printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n", + gsi, vector, i); + return i; +} + static int pin_2_irq(int idx, int apic, int pin) { int irq, i; @@ -639,6 +702,7 @@ static int pin_2_irq(int idx, int apic, int pin) while (i < apic) irq += nr_ioapic_registers[i++]; irq += pin; + irq = gsi_irq_sharing(irq); break; } default: @@ -648,6 +712,7 @@ static int pin_2_irq(int idx, int apic, int pin) break; } } + BUG_ON(irq >= NR_IRQS); /* * PCI IRQ command line redirection. Yes, limits are hardcoded. @@ -663,6 +728,7 @@ static int pin_2_irq(int idx, int apic, int pin) } } } + BUG_ON(irq >= NR_IRQS); return irq; } @@ -690,8 +756,8 @@ int assign_irq_vector(int irq) { static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; - BUG_ON(irq >= NR_IRQ_VECTORS); - if (IO_APIC_VECTOR(irq) > 0) + BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); + if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) return IO_APIC_VECTOR(irq); next: current_vector += 8; @@ -699,9 +765,8 @@ next: goto next; if (current_vector >= FIRST_SYSTEM_VECTOR) { - offset++; - if (!(offset%8)) - return -ENOSPC; + /* If we run out of vectors on large boxen, must share them. */ + offset = (offset + 1) % 8; current_vector = FIRST_DEVICE_VECTOR + offset; } @@ -1917,6 +1982,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a entry.polarity = active_high_low; entry.mask = 1; /* Disabled (masked) */ + irq = gsi_irq_sharing(irq); /* * IRQs < 16 are already in the irq_2_pin[] map */ diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 69541db5ff2c..183dc6105429 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -37,7 +37,7 @@ static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; static unsigned long console_logged; static int notify_user; static int rip_msr; -static int mce_bootlog; +static int mce_bootlog = 1; /* * Lockless MCE logging infrastructure. @@ -347,7 +347,11 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) /* disable GART TBL walk error reporting, which trips off incorrectly with the IOMMU & 3ware & Cerberus. */ clear_bit(10, &bank[4]); + /* Lots of broken BIOS around that don't clear them + by default and leave crap in there. Don't log. */ + mce_bootlog = 0; } + } static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) @@ -356,6 +360,9 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) case X86_VENDOR_INTEL: mce_intel_feature_init(c); break; + case X86_VENDOR_AMD: + mce_amd_feature_init(c); + break; default: break; } @@ -495,16 +502,16 @@ static int __init mcheck_disable(char *str) /* mce=off disables machine check. Note you can reenable it later using sysfs. mce=TOLERANCELEVEL (number, see above) - mce=bootlog Log MCEs from before booting. Disabled by default to work - around buggy BIOS that leave bogus MCEs. */ + mce=bootlog Log MCEs from before booting. Disabled by default on AMD. + mce=nobootlog Don't log MCEs from before booting. */ static int __init mcheck_enable(char *str) { if (*str == '=') str++; if (!strcmp(str, "off")) mce_dont_init = 1; - else if (!strcmp(str, "bootlog")) - mce_bootlog = 1; + else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog")) + mce_bootlog = str[0] == 'b'; else if (isdigit(str[0])) get_option(&str, &tolerant); else diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c new file mode 100644 index 000000000000..1f76175ace02 --- /dev/null +++ b/arch/x86_64/kernel/mce_amd.c @@ -0,0 +1,538 @@ +/* + * (c) 2005 Advanced Micro Devices, Inc. + * Your use of this code is subject to the terms and conditions of the + * GNU general public license version 2. See "COPYING" or + * http://www.gnu.org/licenses/gpl.html + * + * Written by Jacob Shin - AMD, Inc. + * + * Support : jacob.shin@amd.com + * + * MC4_MISC0 DRAM ECC Error Threshold available under AMD K8 Rev F. + * MC4_MISC0 exists per physical processor. + * + */ + +#include <linux/cpu.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/kobject.h> +#include <linux/notifier.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/sysdev.h> +#include <linux/sysfs.h> +#include <asm/apic.h> +#include <asm/mce.h> +#include <asm/msr.h> +#include <asm/percpu.h> + +#define PFX "mce_threshold: " +#define VERSION "version 1.00.9" +#define NR_BANKS 5 +#define THRESHOLD_MAX 0xFFF +#define INT_TYPE_APIC 0x00020000 +#define MASK_VALID_HI 0x80000000 +#define MASK_LVTOFF_HI 0x00F00000 +#define MASK_COUNT_EN_HI 0x00080000 +#define MASK_INT_TYPE_HI 0x00060000 +#define MASK_OVERFLOW_HI 0x00010000 +#define MASK_ERR_COUNT_HI 0x00000FFF +#define MASK_OVERFLOW 0x0001000000000000L + +struct threshold_bank { + unsigned int cpu; + u8 bank; + u8 interrupt_enable; + u16 threshold_limit; + struct kobject kobj; +}; + +static struct threshold_bank threshold_defaults = { + .interrupt_enable = 0, + .threshold_limit = THRESHOLD_MAX, +}; + +#ifdef CONFIG_SMP +static unsigned char shared_bank[NR_BANKS] = { + 0, 0, 0, 0, 1 +}; +#endif + +static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ + +/* + * CPU Initialization + */ + +/* must be called with correct cpu affinity */ +static void threshold_restart_bank(struct threshold_bank *b, + int reset, u16 old_limit) +{ + u32 mci_misc_hi, mci_misc_lo; + + rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi); + + if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) + reset = 1; /* limit cannot be lower than err count */ + + if (reset) { /* reset err count and overflow bit */ + mci_misc_hi = + (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | + (THRESHOLD_MAX - b->threshold_limit); + } else if (old_limit) { /* change limit w/o reset */ + int new_count = (mci_misc_hi & THRESHOLD_MAX) + + (old_limit - b->threshold_limit); + mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | + (new_count & THRESHOLD_MAX); + } + + b->interrupt_enable ? + (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : + (mci_misc_hi &= ~MASK_INT_TYPE_HI); + + mci_misc_hi |= MASK_COUNT_EN_HI; + wrmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi); +} + +void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) +{ + int bank; + u32 mci_misc_lo, mci_misc_hi; + unsigned int cpu = smp_processor_id(); + + for (bank = 0; bank < NR_BANKS; ++bank) { + rdmsr(MSR_IA32_MC0_MISC + bank * 4, mci_misc_lo, mci_misc_hi); + + /* !valid, !counter present, bios locked */ + if (!(mci_misc_hi & MASK_VALID_HI) || + !(mci_misc_hi & MASK_VALID_HI >> 1) || + (mci_misc_hi & MASK_VALID_HI >> 2)) + continue; + + per_cpu(bank_map, cpu) |= (1 << bank); + +#ifdef CONFIG_SMP + if (shared_bank[bank] && cpu_core_id[cpu]) + continue; +#endif + + setup_threshold_lvt((mci_misc_hi & MASK_LVTOFF_HI) >> 20); + threshold_defaults.cpu = cpu; + threshold_defaults.bank = bank; + threshold_restart_bank(&threshold_defaults, 0, 0); + } +} + +/* + * APIC Interrupt Handler + */ + +/* + * threshold interrupt handler will service THRESHOLD_APIC_VECTOR. + * the interrupt goes off when error_count reaches threshold_limit. + * the handler will simply log mcelog w/ software defined bank number. + */ +asmlinkage void mce_threshold_interrupt(void) +{ + int bank; + struct mce m; + + ack_APIC_irq(); + irq_enter(); + + memset(&m, 0, sizeof(m)); + rdtscll(m.tsc); + m.cpu = smp_processor_id(); + + /* assume first bank caused it */ + for (bank = 0; bank < NR_BANKS; ++bank) { + m.bank = MCE_THRESHOLD_BASE + bank; + rdmsrl(MSR_IA32_MC0_MISC + bank * 4, m.misc); + + if (m.misc & MASK_OVERFLOW) { + mce_log(&m); + goto out; + } + } + out: + irq_exit(); +} + +/* + * Sysfs Interface + */ + +static struct sysdev_class threshold_sysclass = { + set_kset_name("threshold"), +}; + +static DEFINE_PER_CPU(struct sys_device, device_threshold); + +struct threshold_attr { + struct attribute attr; + ssize_t(*show) (struct threshold_bank *, char *); + ssize_t(*store) (struct threshold_bank *, const char *, size_t count); +}; + +static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); + +static cpumask_t affinity_set(unsigned int cpu) +{ + cpumask_t oldmask = current->cpus_allowed; + cpumask_t newmask = CPU_MASK_NONE; + cpu_set(cpu, newmask); + set_cpus_allowed(current, newmask); + return oldmask; +} + +static void affinity_restore(cpumask_t oldmask) +{ + set_cpus_allowed(current, oldmask); +} + +#define SHOW_FIELDS(name) \ + static ssize_t show_ ## name(struct threshold_bank * b, char *buf) \ + { \ + return sprintf(buf, "%lx\n", (unsigned long) b->name); \ + } +SHOW_FIELDS(interrupt_enable) +SHOW_FIELDS(threshold_limit) + +static ssize_t store_interrupt_enable(struct threshold_bank *b, + const char *buf, size_t count) +{ + char *end; + cpumask_t oldmask; + unsigned long new = simple_strtoul(buf, &end, 0); + if (end == buf) + return -EINVAL; + b->interrupt_enable = !!new; + + oldmask = affinity_set(b->cpu); + threshold_restart_bank(b, 0, 0); + affinity_restore(oldmask); + + return end - buf; +} + +static ssize_t store_threshold_limit(struct threshold_bank *b, + const char *buf, size_t count) +{ + char *end; + cpumask_t oldmask; + u16 old; + unsigned long new = simple_strtoul(buf, &end, 0); + if (end == buf) + return -EINVAL; + if (new > THRESHOLD_MAX) + new = THRESHOLD_MAX; + if (new < 1) + new = 1; + old = b->threshold_limit; + b->threshold_limit = new; + + oldmask = affinity_set(b->cpu); + threshold_restart_bank(b, 0, old); + affinity_restore(oldmask); + + return end - buf; +} + +static ssize_t show_error_count(struct threshold_bank *b, char *buf) +{ + u32 high, low; + cpumask_t oldmask; + oldmask = affinity_set(b->cpu); + rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, low, high); /* ignore low 32 */ + affinity_restore(oldmask); + return sprintf(buf, "%x\n", + (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); +} + +static ssize_t store_error_count(struct threshold_bank *b, + const char *buf, size_t count) +{ + cpumask_t oldmask; + oldmask = affinity_set(b->cpu); + threshold_restart_bank(b, 1, 0); + affinity_restore(oldmask); + return 1; +} + +#define THRESHOLD_ATTR(_name,_mode,_show,_store) { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +}; + +#define ATTR_FIELDS(name) \ + static struct threshold_attr name = \ + THRESHOLD_ATTR(name, 0644, show_## name, store_## name) + +ATTR_FIELDS(interrupt_enable); +ATTR_FIELDS(threshold_limit); +ATTR_FIELDS(error_count); + +static struct attribute *default_attrs[] = { + &interrupt_enable.attr, + &threshold_limit.attr, + &error_count.attr, + NULL +}; + +#define to_bank(k) container_of(k,struct threshold_bank,kobj) +#define to_attr(a) container_of(a,struct threshold_attr,attr) + +static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) +{ + struct threshold_bank *b = to_bank(kobj); + struct threshold_attr *a = to_attr(attr); + ssize_t ret; + ret = a->show ? a->show(b, buf) : -EIO; + return ret; +} + +static ssize_t store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct threshold_bank *b = to_bank(kobj); + struct threshold_attr *a = to_attr(attr); + ssize_t ret; + ret = a->store ? a->store(b, buf, count) : -EIO; + return ret; +} + +static struct sysfs_ops threshold_ops = { + .show = show, + .store = store, +}; + +static struct kobj_type threshold_ktype = { + .sysfs_ops = &threshold_ops, + .default_attrs = default_attrs, +}; + +/* symlinks sibling shared banks to first core. first core owns dir/files. */ +static __cpuinit int threshold_create_bank(unsigned int cpu, int bank) +{ + int err = 0; + struct threshold_bank *b = 0; + +#ifdef CONFIG_SMP + if (cpu_core_id[cpu] && shared_bank[bank]) { /* symlink */ + char name[16]; + unsigned lcpu = first_cpu(cpu_core_map[cpu]); + if (cpu_core_id[lcpu]) + goto out; /* first core not up yet */ + + b = per_cpu(threshold_banks, lcpu)[bank]; + if (!b) + goto out; + sprintf(name, "bank%i", bank); + err = sysfs_create_link(&per_cpu(device_threshold, cpu).kobj, + &b->kobj, name); + if (err) + goto out; + per_cpu(threshold_banks, cpu)[bank] = b; + goto out; + } +#endif + + b = kmalloc(sizeof(struct threshold_bank), GFP_KERNEL); + if (!b) { + err = -ENOMEM; + goto out; + } + memset(b, 0, sizeof(struct threshold_bank)); + + b->cpu = cpu; + b->bank = bank; + b->interrupt_enable = 0; + b->threshold_limit = THRESHOLD_MAX; + kobject_set_name(&b->kobj, "bank%i", bank); + b->kobj.parent = &per_cpu(device_threshold, cpu).kobj; + b->kobj.ktype = &threshold_ktype; + + err = kobject_register(&b->kobj); + if (err) { + kfree(b); + goto out; + } + per_cpu(threshold_banks, cpu)[bank] = b; + out: + return err; +} + +/* create dir/files for all valid threshold banks */ +static __cpuinit int threshold_create_device(unsigned int cpu) +{ + int bank; + int err = 0; + + per_cpu(device_threshold, cpu).id = cpu; + per_cpu(device_threshold, cpu).cls = &threshold_sysclass; + err = sysdev_register(&per_cpu(device_threshold, cpu)); + if (err) + goto out; + + for (bank = 0; bank < NR_BANKS; ++bank) { + if (!(per_cpu(bank_map, cpu) & 1 << bank)) + continue; + err = threshold_create_bank(cpu, bank); + if (err) + goto out; + } + out: + return err; +} + +#ifdef CONFIG_HOTPLUG_CPU +/* + * let's be hotplug friendly. + * in case of multiple core processors, the first core always takes ownership + * of shared sysfs dir/files, and rest of the cores will be symlinked to it. + */ + +/* cpu hotplug call removes all symlinks before first core dies */ +static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank) +{ + struct threshold_bank *b; + char name[16]; + + b = per_cpu(threshold_banks, cpu)[bank]; + if (!b) + return; + if (shared_bank[bank] && atomic_read(&b->kobj.kref.refcount) > 2) { + sprintf(name, "bank%i", bank); + sysfs_remove_link(&per_cpu(device_threshold, cpu).kobj, name); + per_cpu(threshold_banks, cpu)[bank] = 0; + } else { + kobject_unregister(&b->kobj); + kfree(per_cpu(threshold_banks, cpu)[bank]); + } +} + +static __cpuinit void threshold_remove_device(unsigned int cpu) +{ + int bank; + + for (bank = 0; bank < NR_BANKS; ++bank) { + if (!(per_cpu(bank_map, cpu) & 1 << bank)) + continue; + threshold_remove_bank(cpu, bank); + } + sysdev_unregister(&per_cpu(device_threshold, cpu)); +} + +/* link all existing siblings when first core comes up */ +static __cpuinit int threshold_create_symlinks(unsigned int cpu) +{ + int bank, err = 0; + unsigned int lcpu = 0; + + if (cpu_core_id[cpu]) + return 0; + for_each_cpu_mask(lcpu, cpu_core_map[cpu]) { + if (lcpu == cpu) + continue; + for (bank = 0; bank < NR_BANKS; ++bank) { + if (!(per_cpu(bank_map, cpu) & 1 << bank)) + continue; + if (!shared_bank[bank]) + continue; + err = threshold_create_bank(lcpu, bank); + } + } + return err; +} + +/* remove all symlinks before first core dies. */ +static __cpuinit void threshold_remove_symlinks(unsigned int cpu) +{ + int bank; + unsigned int lcpu = 0; + if (cpu_core_id[cpu]) + return; + for_each_cpu_mask(lcpu, cpu_core_map[cpu]) { + if (lcpu == cpu) + continue; + for (bank = 0; bank < NR_BANKS; ++bank) { + if (!(per_cpu(bank_map, cpu) & 1 << bank)) + continue; + if (!shared_bank[bank]) + continue; + threshold_remove_bank(lcpu, bank); + } + } +} +#else /* !CONFIG_HOTPLUG_CPU */ +static __cpuinit void threshold_create_symlinks(unsigned int cpu) +{ +} +static __cpuinit void threshold_remove_symlinks(unsigned int cpu) +{ +} +static void threshold_remove_device(unsigned int cpu) +{ +} +#endif + +/* get notified when a cpu comes on/off */ +static __cpuinit int threshold_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + /* cpu was unsigned int to begin with */ + unsigned int cpu = (unsigned long)hcpu; + + if (cpu >= NR_CPUS) + goto out; + + switch (action) { + case CPU_ONLINE: + threshold_create_device(cpu); + threshold_create_symlinks(cpu); + break; + case CPU_DOWN_PREPARE: + threshold_remove_symlinks(cpu); + break; + case CPU_DOWN_FAILED: + threshold_create_symlinks(cpu); + break; + case CPU_DEAD: + threshold_remove_device(cpu); + break; + default: + break; + } + out: + return NOTIFY_OK; +} + +static struct notifier_block threshold_cpu_notifier = { + .notifier_call = threshold_cpu_callback, +}; + +static __init int threshold_init_device(void) +{ + int err; + int lcpu = 0; + + err = sysdev_class_register(&threshold_sysclass); + if (err) + goto out; + + /* to hit CPUs online before the notifier is up */ + for_each_online_cpu(lcpu) { + err = threshold_create_device(lcpu); + if (err) + goto out; + } + register_cpu_notifier(&threshold_cpu_notifier); + + out: + return err; +} + +device_initcall(threshold_init_device); diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index f16d38d09daf..1105250bf02c 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -42,7 +42,7 @@ int acpi_found_madt; * Various Linux-internal data structures created from the * MP-table. */ -int apic_version [MAX_APICS]; +unsigned char apic_version [MAX_APICS]; unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; @@ -65,7 +65,9 @@ unsigned long mp_lapic_addr = 0; /* Processor that is doing the boot up */ unsigned int boot_cpu_id = -1U; /* Internal processor count */ -static unsigned int num_processors = 0; +unsigned int num_processors __initdata = 0; + +unsigned disabled_cpus __initdata; /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; @@ -106,11 +108,14 @@ static int __init mpf_checksum(unsigned char *mp, int len) static void __init MP_processor_info (struct mpc_config_processor *m) { - int ver, cpu; + int cpu; + unsigned char ver; static int found_bsp=0; - if (!(m->mpc_cpuflag & CPU_ENABLED)) + if (!(m->mpc_cpuflag & CPU_ENABLED)) { + disabled_cpus++; return; + } printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n", m->mpc_apicid, @@ -129,12 +134,14 @@ static void __init MP_processor_info (struct mpc_config_processor *m) } cpu = num_processors++; - - if (m->mpc_apicid > MAX_APICS) { + +#if MAX_APICS < 255 + if ((int)m->mpc_apicid > MAX_APICS) { printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", m->mpc_apicid, MAX_APICS); return; } +#endif ver = m->mpc_apicver; physid_set(m->mpc_apicid, phys_cpu_present_map); @@ -218,7 +225,7 @@ static void __init MP_intsrc_info (struct mpc_config_intsrc *m) m->mpc_irqtype, m->mpc_irqflag & 3, (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); - if (++mp_irq_entries == MAX_IRQ_SOURCES) + if (++mp_irq_entries >= MAX_IRQ_SOURCES) panic("Max # of irq sources exceeded!!\n"); } @@ -549,7 +556,7 @@ void __init get_smp_config (void) * Read the physical hardware table. Anything here will * override the defaults. */ - if (!smp_read_mpc((void *)(unsigned long)mpf->mpf_physptr)) { + if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr))) { smp_found_config = 0; printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 88be97c96987..2e28e855ec3c 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c @@ -220,6 +220,12 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, uses the normal dma_mask for alloc_coherent. */ dma_mask &= *dev->dma_mask; + /* Why <=? Even when the mask is smaller than 4GB it is often larger + than 16MB and in this case we have a chance of finding fitting memory + in the next higher zone first. If not retry with true GFP_DMA. -AK */ + if (dma_mask <= 0xffffffff) + gfp |= GFP_DMA32; + again: memory = dma_alloc_pages(dev, gfp, get_order(size)); if (memory == NULL) @@ -245,7 +251,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, } if (!(gfp & GFP_DMA)) { - gfp |= GFP_DMA; + gfp = (gfp & ~GFP_DMA32) | GFP_DMA; goto again; } return NULL; diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 59be85d9a4bc..5afd63e8cef7 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -144,7 +144,8 @@ void cpu_idle_wait(void) do { ssleep(1); for_each_online_cpu(cpu) { - if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) + if (cpu_isset(cpu, map) && + !per_cpu(cpu_idle_state, cpu)) cpu_clear(cpu, map); } cpus_and(map, map, cpu_online_map); @@ -275,7 +276,8 @@ void __show_regs(struct pt_regs * regs) system_utsname.version); printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); printk_address(regs->rip); - printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags); + printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, + regs->eflags); printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", regs->rax, regs->rbx, regs->rcx); printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", @@ -427,15 +429,14 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, struct pt_regs * childregs; struct task_struct *me = current; - childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1; - + childregs = ((struct pt_regs *) + (THREAD_SIZE + (unsigned long) p->thread_info)) - 1; *childregs = *regs; childregs->rax = 0; childregs->rsp = rsp; - if (rsp == ~0UL) { + if (rsp == ~0UL) childregs->rsp = (unsigned long)childregs; - } p->thread.rsp = (unsigned long) childregs; p->thread.rsp0 = (unsigned long) (childregs+1); @@ -457,7 +458,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, p->thread.io_bitmap_max = 0; return -ENOMEM; } - memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES); + memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, + IO_BITMAP_BYTES); } /* @@ -494,7 +496,8 @@ out: * - fold all the options into a flag word and test it with a single test. * - could test fs/gs bitsliced */ -struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p) +struct task_struct * +__switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; @@ -565,7 +568,8 @@ struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct * prev->userrsp = read_pda(oldrsp); write_pda(oldrsp, next->userrsp); write_pda(pcurrent, next_p); - write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET); + write_pda(kernelstack, + (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET); /* * Now maybe reload the debug registers @@ -646,7 +650,9 @@ asmlinkage long sys_fork(struct pt_regs *regs) return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL); } -asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) +asmlinkage long +sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) { if (!newsp) newsp = regs->rsp; @@ -682,7 +688,8 @@ unsigned long get_wchan(struct task_struct *p) return 0; fp = *(u64 *)(p->thread.rsp); do { - if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE) + if (fp < (unsigned long)stack || + fp > (unsigned long)stack+THREAD_SIZE) return 0; rip = *(u64 *)(fp+8); if (!in_sched_functions(rip)) @@ -717,8 +724,8 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) task->thread.gsindex = 0; task->thread.gs = addr; if (doit) { - load_gs_index(0); - ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); + load_gs_index(0); + ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); } } put_cpu(); @@ -735,7 +742,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) set_32bit_tls(task, FS_TLS, addr); if (doit) { load_TLS(&task->thread, cpu); - asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL)); + asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL)); } task->thread.fsindex = FS_TLS_SEL; task->thread.fs = 0; @@ -745,8 +752,8 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) if (doit) { /* set the selector to 0 to not confuse __switch_to */ - asm volatile("movl %0,%%fs" :: "r" (0)); - ret = checking_wrmsrl(MSR_FS_BASE, addr); + asm volatile("movl %0,%%fs" :: "r" (0)); + ret = checking_wrmsrl(MSR_FS_BASE, addr); } } put_cpu(); @@ -755,9 +762,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) unsigned long base; if (task->thread.fsindex == FS_TLS_SEL) base = read_32bit_tls(task, FS_TLS); - else if (doit) { + else if (doit) rdmsrl(MSR_FS_BASE, base); - } else + else base = task->thread.fs; ret = put_user(base, (unsigned long __user *)addr); break; @@ -766,9 +773,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) unsigned long base; if (task->thread.gsindex == GS_TLS_SEL) base = read_32bit_tls(task, GS_TLS); - else if (doit) { + else if (doit) rdmsrl(MSR_KERNEL_GS_BASE, base); - } else + else base = task->thread.gs; ret = put_user(base, (unsigned long __user *)addr); break; diff --git a/arch/x86_64/kernel/reboot.c b/arch/x86_64/kernel/reboot.c index 47f95687905f..75235ed2b31b 100644 --- a/arch/x86_64/kernel/reboot.c +++ b/arch/x86_64/kernel/reboot.c @@ -77,6 +77,7 @@ static inline void kb_wait(void) void machine_shutdown(void) { + unsigned long flags; /* Stop the cpus and apics */ #ifdef CONFIG_SMP int reboot_cpu_id; @@ -98,7 +99,7 @@ void machine_shutdown(void) smp_send_stop(); #endif - local_irq_disable(); + local_irq_save(flags); #ifndef CONFIG_SMP disable_local_APIC(); @@ -106,7 +107,7 @@ void machine_shutdown(void) disable_IO_APIC(); - local_irq_enable(); + local_irq_restore(flags); } void machine_emergency_restart(void) @@ -120,7 +121,7 @@ void machine_emergency_restart(void) /* Could also try the reset bit in the Hammer NB */ switch (reboot_type) { case BOOT_KBD: - for (i=0; i<100; i++) { + for (i=0; i<10; i++) { kb_wait(); udelay(50); outb(0xfe,0x64); /* pulse reset low */ diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index da0bc3e7bdf5..750e01dcbdf4 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -60,6 +60,7 @@ #include <asm/setup.h> #include <asm/mach_apic.h> #include <asm/numa.h> +#include <asm/sections.h> /* * Machine setup.. @@ -103,7 +104,6 @@ struct edid_info edid_info; struct e820map e820; extern int root_mountflags; -extern char _text, _etext, _edata, _end; char command_line[COMMAND_LINE_SIZE]; @@ -412,7 +412,6 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) { unsigned long bootmap_size, bootmap; - memory_present(0, start_pfn, end_pfn); bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size); if (bootmap == -1L) @@ -571,6 +570,8 @@ void __init setup_arch(char **cmdline_p) init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); + zap_low_mappings(0); + #ifdef CONFIG_ACPI /* * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). @@ -657,8 +658,6 @@ void __init setup_arch(char **cmdline_p) } #endif - sparse_init(); - paging_init(); check_ioapic(); @@ -793,7 +792,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) #endif bits = 0; - while ((1 << bits) < c->x86_num_cores) + while ((1 << bits) < c->x86_max_cores) bits++; /* Low order bits define the core id (index of core in socket) */ @@ -823,10 +822,10 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) if (!node_online(node)) node = nearby_node(apicid); } - cpu_to_node[cpu] = node; + numa_set_node(cpu, node); printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", - cpu, c->x86_num_cores, node, cpu_core_id[cpu]); + cpu, c->x86_max_cores, node, cpu_core_id[cpu]); #endif #endif } @@ -875,9 +874,9 @@ static int __init init_amd(struct cpuinfo_x86 *c) display_cacheinfo(c); if (c->extended_cpuid_level >= 0x80000008) { - c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; - if (c->x86_num_cores & (c->x86_num_cores - 1)) - c->x86_num_cores = 1; + c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; + if (c->x86_max_cores & (c->x86_max_cores - 1)) + c->x86_max_cores = 1; amd_detect_cmp(c); } @@ -889,54 +888,44 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP u32 eax, ebx, ecx, edx; - int index_msb, tmp; + int index_msb, core_bits; int cpu = smp_processor_id(); - + + cpuid(1, &eax, &ebx, &ecx, &edx); + + c->apicid = phys_pkg_id(0); + if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; - cpuid(1, &eax, &ebx, &ecx, &edx); smp_num_siblings = (ebx & 0xff0000) >> 16; - + if (smp_num_siblings == 1) { printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1) { - index_msb = 31; - /* - * At this point we only support two siblings per - * processor package. - */ + } else if (smp_num_siblings > 1 ) { + if (smp_num_siblings > NR_CPUS) { printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); smp_num_siblings = 1; return; } - tmp = smp_num_siblings; - while ((tmp & 0x80000000 ) == 0) { - tmp <<=1 ; - index_msb--; - } - if (smp_num_siblings & (smp_num_siblings - 1)) - index_msb++; + + index_msb = get_count_order(smp_num_siblings); phys_proc_id[cpu] = phys_pkg_id(index_msb); - + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", phys_proc_id[cpu]); - smp_num_siblings = smp_num_siblings / c->x86_num_cores; + smp_num_siblings = smp_num_siblings / c->x86_max_cores; - tmp = smp_num_siblings; - index_msb = 31; - while ((tmp & 0x80000000) == 0) { - tmp <<=1 ; - index_msb--; - } - if (smp_num_siblings & (smp_num_siblings - 1)) - index_msb++; + index_msb = get_count_order(smp_num_siblings) ; - cpu_core_id[cpu] = phys_pkg_id(index_msb); + core_bits = get_count_order(c->x86_max_cores); - if (c->x86_num_cores > 1) + cpu_core_id[cpu] = phys_pkg_id(index_msb) & + ((1 << core_bits) - 1); + + if (c->x86_max_cores > 1) printk(KERN_INFO "CPU: Processor Core ID: %d\n", cpu_core_id[cpu]); } @@ -975,7 +964,7 @@ static void srat_detect_node(void) node = apicid_to_node[hard_smp_processor_id()]; if (node == NUMA_NO_NODE) node = 0; - cpu_to_node[cpu] = node; + numa_set_node(cpu, node); if (acpi_numa > 0) printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node); @@ -993,13 +982,18 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) unsigned eax = cpuid_eax(0x80000008); c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; + /* CPUID workaround for Intel 0F34 CPU */ + if (c->x86_vendor == X86_VENDOR_INTEL && + c->x86 == 0xF && c->x86_model == 0x3 && + c->x86_mask == 0x4) + c->x86_phys_bits = 36; } if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; if (c->x86 >= 15) set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); - c->x86_num_cores = intel_num_cpu_cores(c); + c->x86_max_cores = intel_num_cpu_cores(c); srat_detect_node(); } @@ -1037,7 +1031,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86_model_id[0] = '\0'; /* Unset */ c->x86_clflush_size = 64; c->x86_cache_alignment = c->x86_clflush_size; - c->x86_num_cores = 1; + c->x86_max_cores = 1; c->extended_cpuid_level = 0; memset(&c->x86_capability, 0, sizeof c->x86_capability); @@ -1060,10 +1054,10 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) c->x86 = (tfms >> 8) & 0xf; c->x86_model = (tfms >> 4) & 0xf; c->x86_mask = tfms & 0xf; - if (c->x86 == 0xf) { + if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; + if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; - } if (c->x86_capability[0] & (1<<19)) c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; } else { @@ -1271,13 +1265,12 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); #ifdef CONFIG_SMP - if (smp_num_siblings * c->x86_num_cores > 1) { + if (smp_num_siblings * c->x86_max_cores > 1) { int cpu = c - cpu_data; seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]); - seq_printf(m, "siblings\t: %d\n", - c->x86_num_cores * smp_num_siblings); + seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu])); seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]); - seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores); + seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); } #endif diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 79190891fbc5..06dc354375c3 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -141,7 +141,6 @@ void pda_init(int cpu) panic("cannot allocate irqstack for cpu %d", cpu); } - asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); pda->irqstackptr += IRQSTACKSIZE-64; } @@ -197,6 +196,7 @@ void __cpuinit cpu_init (void) /* CPU 0 is initialised in head64.c */ if (cpu != 0) { pda_init(cpu); + zap_low_mappings(cpu); } else estacks = boot_exception_stacks; diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c index d642fbf3da29..5876df116c92 100644 --- a/arch/x86_64/kernel/signal.c +++ b/arch/x86_64/kernel/signal.c @@ -110,6 +110,15 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned COPY(r14); COPY(r15); + /* Kernel saves and restores only the CS segment register on signals, + * which is the bare minimum needed to allow mixed 32/64-bit code. + * App's signal handler can save/restore other segments if needed. */ + { + unsigned cs; + err |= __get_user(cs, &sc->cs); + regs->cs = cs | 3; /* Force into user mode */ + } + { unsigned int tmpflags; err |= __get_user(tmpflags, &sc->eflags); @@ -187,6 +196,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned lo { int err = 0; + err |= __put_user(regs->cs, &sc->cs); err |= __put_user(0, &sc->gs); err |= __put_user(0, &sc->fs); @@ -318,7 +328,14 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->rsp = (unsigned long)frame; + /* Set up the CS register to run signal handlers in 64-bit mode, + even if the handler happens to be interrupting 32-bit code. */ + regs->cs = __USER_CS; + + /* This, by contrast, has nothing to do with segment registers - + see include/asm-x86_64/uaccess.h for details. */ set_fs(USER_DS); + regs->eflags &= ~TF_MASK; if (test_thread_flag(TIF_SINGLESTEP)) ptrace_notify(SIGTRAP); diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 9db9dda161b4..cfc3d9dccbd9 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c @@ -28,8 +28,6 @@ #include <asm/proto.h> #include <asm/apicdef.h> -#define __cpuinit __init - /* * Smarter SMP flushing macros. * c/o Linus Torvalds. @@ -452,13 +450,14 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, void smp_stop_cpu(void) { + unsigned long flags; /* * Remove this CPU: */ cpu_clear(smp_processor_id(), cpu_online_map); - local_irq_disable(); + local_irq_save(flags); disable_local_APIC(); - local_irq_enable(); + local_irq_restore(flags); } static void smp_really_stop_cpu(void *dummy) diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index c4e59bbdc187..683c33f7b967 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -64,6 +64,7 @@ int smp_num_siblings = 1; /* Package ID of each logical CPU */ u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; +/* core ID of each logical CPU */ u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; /* Bitmask of currently online CPUs */ @@ -87,7 +88,10 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; /* Set when the idlers are all forked */ int smp_threads_ready; +/* representing HT siblings of each logical CPU */ cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; + +/* representing HT and core siblings of each logical CPU */ cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); @@ -434,30 +438,59 @@ void __cpuinit smp_callin(void) cpu_set(cpuid, cpu_callin_map); } +/* representing cpus for which sibling maps can be computed */ +static cpumask_t cpu_sibling_setup_map; + static inline void set_cpu_sibling_map(int cpu) { int i; + struct cpuinfo_x86 *c = cpu_data; + + cpu_set(cpu, cpu_sibling_setup_map); if (smp_num_siblings > 1) { - for_each_cpu(i) { - if (cpu_core_id[cpu] == cpu_core_id[i]) { + for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (phys_proc_id[cpu] == phys_proc_id[i] && + cpu_core_id[cpu] == cpu_core_id[i]) { cpu_set(i, cpu_sibling_map[cpu]); cpu_set(cpu, cpu_sibling_map[i]); + cpu_set(i, cpu_core_map[cpu]); + cpu_set(cpu, cpu_core_map[i]); } } } else { cpu_set(cpu, cpu_sibling_map[cpu]); } - if (current_cpu_data.x86_num_cores > 1) { - for_each_cpu(i) { - if (phys_proc_id[cpu] == phys_proc_id[i]) { - cpu_set(i, cpu_core_map[cpu]); - cpu_set(cpu, cpu_core_map[i]); - } - } - } else { + if (current_cpu_data.x86_max_cores == 1) { cpu_core_map[cpu] = cpu_sibling_map[cpu]; + c[cpu].booted_cores = 1; + return; + } + + for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (phys_proc_id[cpu] == phys_proc_id[i]) { + cpu_set(i, cpu_core_map[cpu]); + cpu_set(cpu, cpu_core_map[i]); + /* + * Does this new cpu bringup a new core? + */ + if (cpus_weight(cpu_sibling_map[cpu]) == 1) { + /* + * for each core in package, increment + * the booted_cores for this new cpu + */ + if (first_cpu(cpu_sibling_map[i]) == i) + c[cpu].booted_cores++; + /* + * increment the core count for all + * the other cpus in this package + */ + if (i != cpu) + c[i].booted_cores++; + } else if (i != cpu && !c[cpu].booted_cores) + c[cpu].booted_cores = c[i].booted_cores; + } } } @@ -879,6 +912,9 @@ static __init void disable_smp(void) } #ifdef CONFIG_HOTPLUG_CPU + +int additional_cpus __initdata = -1; + /* * cpu_possible_map should be static, it cannot change as cpu's * are onlined, or offlined. The reason is per-cpu data-structures @@ -887,14 +923,38 @@ static __init void disable_smp(void) * cpu_present_map on the other hand can change dynamically. * In case when cpu_hotplug is not compiled, then we resort to current * behaviour, which is cpu_possible == cpu_present. - * If cpu-hotplug is supported, then we need to preallocate for all - * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range. * - Ashok Raj + * + * Three ways to find out the number of additional hotplug CPUs: + * - If the BIOS specified disabled CPUs in ACPI/mptables use that. + * - otherwise use half of the available CPUs or 2, whatever is more. + * - The user can overwrite it with additional_cpus=NUM + * We do this because additional CPUs waste a lot of memory. + * -AK */ __init void prefill_possible_map(void) { int i; - for (i = 0; i < NR_CPUS; i++) + int possible; + + if (additional_cpus == -1) { + if (disabled_cpus > 0) { + additional_cpus = disabled_cpus; + } else { + additional_cpus = num_processors / 2; + if (additional_cpus == 0) + additional_cpus = 2; + } + } + possible = num_processors + additional_cpus; + if (possible > NR_CPUS) + possible = NR_CPUS; + + printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", + possible, + max_t(int, possible - num_processors, 0)); + + for (i = 0; i < possible; i++) cpu_set(i, cpu_possible_map); } #endif @@ -965,6 +1025,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) nmi_watchdog_default(); current_cpu_data = boot_cpu_data; current_thread_info()->cpu = 0; /* needed? */ + set_cpu_sibling_map(0); if (smp_sanity_check(max_cpus) < 0) { printk(KERN_INFO "SMP disabled\n"); @@ -1008,8 +1069,6 @@ void __init smp_prepare_boot_cpu(void) int me = smp_processor_id(); cpu_set(me, cpu_online_map); cpu_set(me, cpu_callout_map); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); per_cpu(cpu_state, me) = CPU_ONLINE; } @@ -1062,9 +1121,6 @@ int __cpuinit __cpu_up(unsigned int cpu) */ void __init smp_cpus_done(unsigned int max_cpus) { -#ifndef CONFIG_HOTPLUG_CPU - zap_low_mappings(); -#endif smp_cleanup_boot(); #ifdef CONFIG_X86_IO_APIC @@ -1081,15 +1137,24 @@ void __init smp_cpus_done(unsigned int max_cpus) static void remove_siblinginfo(int cpu) { int sibling; + struct cpuinfo_x86 *c = cpu_data; + for_each_cpu_mask(sibling, cpu_core_map[cpu]) { + cpu_clear(cpu, cpu_core_map[sibling]); + /* + * last thread sibling in this cpu core going down + */ + if (cpus_weight(cpu_sibling_map[cpu]) == 1) + c[sibling].booted_cores--; + } + for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) cpu_clear(cpu, cpu_sibling_map[sibling]); - for_each_cpu_mask(sibling, cpu_core_map[cpu]) - cpu_clear(cpu, cpu_core_map[sibling]); cpus_clear(cpu_sibling_map[cpu]); cpus_clear(cpu_core_map[cpu]); phys_proc_id[cpu] = BAD_APICID; cpu_core_id[cpu] = BAD_APICID; + cpu_clear(cpu, cpu_sibling_setup_map); } void remove_cpu_from_maps(void) @@ -1153,6 +1218,12 @@ void __cpu_die(unsigned int cpu) printk(KERN_ERR "CPU %u didn't die...\n", cpu); } +static __init int setup_additional_cpus(char *s) +{ + return get_option(&s, &additional_cpus); +} +__setup("additional_cpus=", setup_additional_cpus); + #else /* ... !CONFIG_HOTPLUG_CPU */ int __cpu_disable(void) diff --git a/arch/x86_64/kernel/sys_x86_64.c b/arch/x86_64/kernel/sys_x86_64.c index cc7821c68851..6449ea8fe756 100644 --- a/arch/x86_64/kernel/sys_x86_64.c +++ b/arch/x86_64/kernel/sys_x86_64.c @@ -154,17 +154,3 @@ asmlinkage long sys_uname(struct new_utsname __user * name) err |= copy_to_user(&name->machine, "i686", 5); return err ? -EFAULT : 0; } - -asmlinkage long sys_time64(long __user * tloc) -{ - struct timeval now; - int i; - - do_gettimeofday(&now); - i = now.tv_sec; - if (tloc) { - if (put_user(i,tloc)) - i = -EFAULT; - } - return i; -} diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index b5e09e6b5536..bf337f493189 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -428,19 +428,6 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, { conditional_sti(regs); -#ifdef CONFIG_CHECKING - { - unsigned long gs; - struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); - rdmsrl(MSR_GS_BASE, gs); - if (gs != (unsigned long)pda) { - wrmsrl(MSR_GS_BASE, pda); - printk("%s: wrong gs %lx expected %p rip %lx\n", str, gs, pda, - regs->rip); - } - } -#endif - if (user_mode(regs)) { struct task_struct *tsk = current; @@ -513,20 +500,6 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, { conditional_sti(regs); -#ifdef CONFIG_CHECKING - { - unsigned long gs; - struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); - rdmsrl(MSR_GS_BASE, gs); - if (gs != (unsigned long)pda) { - wrmsrl(MSR_GS_BASE, pda); - oops_in_progress++; - printk("general protection handler: wrong gs %lx expected %p\n", gs, pda); - oops_in_progress--; - } - } -#endif - if (user_mode(regs)) { struct task_struct *tsk = current; @@ -665,19 +638,6 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs, struct task_struct *tsk = current; siginfo_t info; -#ifdef CONFIG_CHECKING - { - /* RED-PEN interaction with debugger - could destroy gs */ - unsigned long gs; - struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); - rdmsrl(MSR_GS_BASE, gs); - if (gs != (unsigned long)pda) { - wrmsrl(MSR_GS_BASE, pda); - printk("debug handler: wrong gs %lx expected %p\n", gs, pda); - } - } -#endif - get_debugreg(condition, 6); if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, @@ -888,6 +848,10 @@ asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) { } +asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) +{ +} + /* * 'math_state_restore()' saves the current math information in the * old math state array, and gets the new ones from the current task diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 6dd642cad2ef..58b19215b4b3 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -50,7 +50,7 @@ SECTIONS *(.bss.page_aligned) *(.bss) } - __bss_end = .; + __bss_stop = .; . = ALIGN(PAGE_SIZE); . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index fd99ddd009bc..4a54221e10bc 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c @@ -203,3 +203,6 @@ EXPORT_SYMBOL(flush_tlb_page); #endif EXPORT_SYMBOL(cpu_khz); + +EXPORT_SYMBOL(load_gs_index); + diff --git a/arch/x86_64/lib/clear_page.S b/arch/x86_64/lib/clear_page.S index 30a9da458c15..43d9fa136180 100644 --- a/arch/x86_64/lib/clear_page.S +++ b/arch/x86_64/lib/clear_page.S @@ -5,46 +5,8 @@ .globl clear_page .p2align 4 clear_page: - xorl %eax,%eax - movl $4096/64,%ecx - .p2align 4 -.Lloop: - decl %ecx -#define PUT(x) movq %rax,x*8(%rdi) - movq %rax,(%rdi) - PUT(1) - PUT(2) - PUT(3) - PUT(4) - PUT(5) - PUT(6) - PUT(7) - leaq 64(%rdi),%rdi - jnz .Lloop - nop - ret -clear_page_end: - - /* C stepping K8 run faster using the string instructions. - It is also a lot simpler. Use this when possible */ - -#include <asm/cpufeature.h> - - .section .altinstructions,"a" - .align 8 - .quad clear_page - .quad clear_page_c - .byte X86_FEATURE_K8_C - .byte clear_page_end-clear_page - .byte clear_page_c_end-clear_page_c - .previous - - .section .altinstr_replacement,"ax" -clear_page_c: movl $4096/8,%ecx xorl %eax,%eax rep stosq ret -clear_page_c_end: - .previous diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S index dd3aa47b6bf5..621a19769406 100644 --- a/arch/x86_64/lib/copy_page.S +++ b/arch/x86_64/lib/copy_page.S @@ -8,94 +8,7 @@ .globl copy_page .p2align 4 copy_page: - subq $3*8,%rsp - movq %rbx,(%rsp) - movq %r12,1*8(%rsp) - movq %r13,2*8(%rsp) - - movl $(4096/64)-5,%ecx - .p2align 4 -.Loop64: - dec %rcx - - movq (%rsi), %rax - movq 8 (%rsi), %rbx - movq 16 (%rsi), %rdx - movq 24 (%rsi), %r8 - movq 32 (%rsi), %r9 - movq 40 (%rsi), %r10 - movq 48 (%rsi), %r11 - movq 56 (%rsi), %r12 - - prefetcht0 5*64(%rsi) - - movq %rax, (%rdi) - movq %rbx, 8 (%rdi) - movq %rdx, 16 (%rdi) - movq %r8, 24 (%rdi) - movq %r9, 32 (%rdi) - movq %r10, 40 (%rdi) - movq %r11, 48 (%rdi) - movq %r12, 56 (%rdi) - - leaq 64 (%rsi), %rsi - leaq 64 (%rdi), %rdi - - jnz .Loop64 - - movl $5,%ecx - .p2align 4 -.Loop2: - decl %ecx - - movq (%rsi), %rax - movq 8 (%rsi), %rbx - movq 16 (%rsi), %rdx - movq 24 (%rsi), %r8 - movq 32 (%rsi), %r9 - movq 40 (%rsi), %r10 - movq 48 (%rsi), %r11 - movq 56 (%rsi), %r12 - - movq %rax, (%rdi) - movq %rbx, 8 (%rdi) - movq %rdx, 16 (%rdi) - movq %r8, 24 (%rdi) - movq %r9, 32 (%rdi) - movq %r10, 40 (%rdi) - movq %r11, 48 (%rdi) - movq %r12, 56 (%rdi) - - leaq 64(%rdi),%rdi - leaq 64(%rsi),%rsi - - jnz .Loop2 - - movq (%rsp),%rbx - movq 1*8(%rsp),%r12 - movq 2*8(%rsp),%r13 - addq $3*8,%rsp - ret - - /* C stepping K8 run faster using the string copy instructions. - It is also a lot simpler. Use this when possible */ - -#include <asm/cpufeature.h> - - .section .altinstructions,"a" - .align 8 - .quad copy_page - .quad copy_page_c - .byte X86_FEATURE_K8_C - .byte copy_page_c_end-copy_page_c - .byte copy_page_c_end-copy_page_c - .previous - - .section .altinstr_replacement,"ax" -copy_page_c: movl $4096/8,%ecx rep movsq ret -copy_page_c_end: - .previous diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S index c6c46494fef5..92dd80544602 100644 --- a/arch/x86_64/lib/memcpy.S +++ b/arch/x86_64/lib/memcpy.S @@ -11,6 +11,8 @@ * * Output: * rax original destination + * + * TODO: check best memcpy for PSC */ .globl __memcpy @@ -18,95 +20,6 @@ .p2align 4 __memcpy: memcpy: - pushq %rbx - movq %rdi,%rax - - movl %edx,%ecx - shrl $6,%ecx - jz .Lhandle_tail - - .p2align 4 -.Lloop_64: - decl %ecx - - movq (%rsi),%r11 - movq 8(%rsi),%r8 - - movq %r11,(%rdi) - movq %r8,1*8(%rdi) - - movq 2*8(%rsi),%r9 - movq 3*8(%rsi),%r10 - - movq %r9,2*8(%rdi) - movq %r10,3*8(%rdi) - - movq 4*8(%rsi),%r11 - movq 5*8(%rsi),%r8 - - movq %r11,4*8(%rdi) - movq %r8,5*8(%rdi) - - movq 6*8(%rsi),%r9 - movq 7*8(%rsi),%r10 - - movq %r9,6*8(%rdi) - movq %r10,7*8(%rdi) - - leaq 64(%rsi),%rsi - leaq 64(%rdi),%rdi - jnz .Lloop_64 - -.Lhandle_tail: - movl %edx,%ecx - andl $63,%ecx - shrl $3,%ecx - jz .Lhandle_7 - .p2align 4 -.Lloop_8: - decl %ecx - movq (%rsi),%r8 - movq %r8,(%rdi) - leaq 8(%rdi),%rdi - leaq 8(%rsi),%rsi - jnz .Lloop_8 - -.Lhandle_7: - movl %edx,%ecx - andl $7,%ecx - jz .Lende - .p2align 4 -.Lloop_1: - movb (%rsi),%r8b - movb %r8b,(%rdi) - incq %rdi - incq %rsi - decl %ecx - jnz .Lloop_1 - -.Lende: - popq %rbx - ret -.Lfinal: - - /* C stepping K8 run faster using the string copy instructions. - It is also a lot simpler. Use this when possible */ - - .section .altinstructions,"a" - .align 8 - .quad memcpy - .quad memcpy_c - .byte X86_FEATURE_K8_C - .byte .Lfinal-memcpy - .byte memcpy_c_end-memcpy_c - .previous - - .section .altinstr_replacement,"ax" - /* rdi destination - * rsi source - * rdx count - */ -memcpy_c: movq %rdi,%rax movl %edx,%ecx shrl $3,%ecx @@ -117,5 +30,3 @@ memcpy_c: rep movsb ret -memcpy_c_end: - .previous diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S index 4b4c40638640..2aa48f24ed1e 100644 --- a/arch/x86_64/lib/memset.S +++ b/arch/x86_64/lib/memset.S @@ -13,98 +13,6 @@ .p2align 4 memset: __memset: - movq %rdi,%r10 - movq %rdx,%r11 - - /* expand byte value */ - movzbl %sil,%ecx - movabs $0x0101010101010101,%rax - mul %rcx /* with rax, clobbers rdx */ - - /* align dst */ - movl %edi,%r9d - andl $7,%r9d - jnz .Lbad_alignment -.Lafter_bad_alignment: - - movl %r11d,%ecx - shrl $6,%ecx - jz .Lhandle_tail - - .p2align 4 -.Lloop_64: - decl %ecx - movq %rax,(%rdi) - movq %rax,8(%rdi) - movq %rax,16(%rdi) - movq %rax,24(%rdi) - movq %rax,32(%rdi) - movq %rax,40(%rdi) - movq %rax,48(%rdi) - movq %rax,56(%rdi) - leaq 64(%rdi),%rdi - jnz .Lloop_64 - - /* Handle tail in loops. The loops should be faster than hard - to predict jump tables. */ - .p2align 4 -.Lhandle_tail: - movl %r11d,%ecx - andl $63&(~7),%ecx - jz .Lhandle_7 - shrl $3,%ecx - .p2align 4 -.Lloop_8: - decl %ecx - movq %rax,(%rdi) - leaq 8(%rdi),%rdi - jnz .Lloop_8 - -.Lhandle_7: - movl %r11d,%ecx - andl $7,%ecx - jz .Lende - .p2align 4 -.Lloop_1: - decl %ecx - movb %al,(%rdi) - leaq 1(%rdi),%rdi - jnz .Lloop_1 - -.Lende: - movq %r10,%rax - ret - -.Lbad_alignment: - cmpq $7,%r11 - jbe .Lhandle_7 - movq %rax,(%rdi) /* unaligned store */ - movq $8,%r8 - subq %r9,%r8 - addq %r8,%rdi - subq %r8,%r11 - jmp .Lafter_bad_alignment - - /* C stepping K8 run faster using the string instructions. - It is also a lot simpler. Use this when possible */ - -#include <asm/cpufeature.h> - - .section .altinstructions,"a" - .align 8 - .quad memset - .quad memset_c - .byte X86_FEATURE_K8_C - .byte memset_c_end-memset_c - .byte memset_c_end-memset_c - .previous - - .section .altinstr_replacement,"ax" - /* rdi destination - * rsi value - * rdx count - */ -memset_c: movq %rdi,%r9 movl %edx,%r8d andl $7,%r8d @@ -121,5 +29,3 @@ memset_c: stosb movq %r9,%rax ret -memset_c_end: - .previous diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index b75b872ec154..3a63707a698b 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -308,18 +308,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long flags; siginfo_t info; -#ifdef CONFIG_CHECKING - { - unsigned long gs; - struct x8664_pda *pda = cpu_pda + stack_smp_processor_id(); - rdmsrl(MSR_GS_BASE, gs); - if (gs != (unsigned long)pda) { - wrmsrl(MSR_GS_BASE, pda); - printk("page_fault: wrong gs %lx expected %p\n", gs, pda); - } - } -#endif - /* get the address */ __asm__("movq %%cr2,%0":"=r" (address)); if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, @@ -571,3 +559,10 @@ do_sigbus: force_sig_info(SIGBUS, &info, tsk); return; } + +static int __init enable_pagefaulttrace(char *str) +{ + page_fault_trace = 1; + return 0; +} +__setup("pagefaulttrace", enable_pagefaulttrace); diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index e60a1a848de8..286f6a624c3a 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -22,6 +22,7 @@ #include <linux/pagemap.h> #include <linux/bootmem.h> #include <linux/proc_fs.h> +#include <linux/pci.h> #include <asm/processor.h> #include <asm/system.h> @@ -36,16 +37,13 @@ #include <asm/mmu_context.h> #include <asm/proto.h> #include <asm/smp.h> +#include <asm/sections.h> #ifndef Dprintk #define Dprintk(x...) #endif -#ifdef CONFIG_GART_IOMMU -extern int swiotlb; -#endif - -extern char _stext[]; +static unsigned long dma_reserve __initdata; DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); @@ -86,9 +84,6 @@ void show_mem(void) /* References to section boundaries */ -extern char _text, _etext, _edata, __bss_start, _end[]; -extern char __init_begin, __init_end; - int after_bootmem; static void *spp_getpage(void) @@ -308,42 +303,81 @@ void __init init_memory_mapping(unsigned long start, unsigned long end) table_end<<PAGE_SHIFT); } -extern struct x8664_pda cpu_pda[NR_CPUS]; +void __cpuinit zap_low_mappings(int cpu) +{ + if (cpu == 0) { + pgd_t *pgd = pgd_offset_k(0UL); + pgd_clear(pgd); + } else { + /* + * For AP's, zap the low identity mappings by changing the cr3 + * to init_level4_pgt and doing local flush tlb all + */ + asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); + } + __flush_tlb_all(); +} -/* Assumes all CPUs still execute in init_mm */ -void zap_low_mappings(void) +/* Compute zone sizes for the DMA and DMA32 zones in a node. */ +__init void +size_zones(unsigned long *z, unsigned long *h, + unsigned long start_pfn, unsigned long end_pfn) { - pgd_t *pgd = pgd_offset_k(0UL); - pgd_clear(pgd); - flush_tlb_all(); + int i; + unsigned long w; + + for (i = 0; i < MAX_NR_ZONES; i++) + z[i] = 0; + + if (start_pfn < MAX_DMA_PFN) + z[ZONE_DMA] = MAX_DMA_PFN - start_pfn; + if (start_pfn < MAX_DMA32_PFN) { + unsigned long dma32_pfn = MAX_DMA32_PFN; + if (dma32_pfn > end_pfn) + dma32_pfn = end_pfn; + z[ZONE_DMA32] = dma32_pfn - start_pfn; + } + z[ZONE_NORMAL] = end_pfn - start_pfn; + + /* Remove lower zones from higher ones. */ + w = 0; + for (i = 0; i < MAX_NR_ZONES; i++) { + if (z[i]) + z[i] -= w; + w += z[i]; + } + + /* Compute holes */ + w = 0; + for (i = 0; i < MAX_NR_ZONES; i++) { + unsigned long s = w; + w += z[i]; + h[i] = e820_hole_size(s, w); + } + + /* Add the space pace needed for mem_map to the holes too. */ + for (i = 0; i < MAX_NR_ZONES; i++) + h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE; + + /* The 16MB DMA zone has the kernel and other misc mappings. + Account them too */ + if (h[ZONE_DMA]) { + h[ZONE_DMA] += dma_reserve; + if (h[ZONE_DMA] >= z[ZONE_DMA]) { + printk(KERN_WARNING + "Kernel too large and filling up ZONE_DMA?\n"); + h[ZONE_DMA] = z[ZONE_DMA]; + } + } } #ifndef CONFIG_NUMA void __init paging_init(void) { - { - unsigned long zones_size[MAX_NR_ZONES]; - unsigned long holes[MAX_NR_ZONES]; - unsigned int max_dma; - - memset(zones_size, 0, sizeof(zones_size)); - memset(holes, 0, sizeof(holes)); - - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - - if (end_pfn < max_dma) { - zones_size[ZONE_DMA] = end_pfn; - holes[ZONE_DMA] = e820_hole_size(0, end_pfn); - } else { - zones_size[ZONE_DMA] = max_dma; - holes[ZONE_DMA] = e820_hole_size(0, max_dma); - zones_size[ZONE_NORMAL] = end_pfn - max_dma; - holes[ZONE_NORMAL] = e820_hole_size(max_dma, end_pfn); - } - free_area_init_node(0, NODE_DATA(0), zones_size, - __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); - } - return; + unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES]; + size_zones(zones, holes, 0, end_pfn); + free_area_init_node(0, NODE_DATA(0), zones, + __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); } #endif @@ -438,19 +472,16 @@ void __init mem_init(void) datasize >> 10, initsize >> 10); +#ifdef CONFIG_SMP /* - * Subtle. SMP is doing its boot stuff late (because it has to - * fork idle threads) - but it also needs low mappings for the - * protected-mode entry to work. We zap these entries only after - * the WP-bit has been tested. + * Sync boot_level4_pgt mappings with the init_level4_pgt + * except for the low identity mappings which are already zapped + * in init_level4_pgt. This sync-up is essential for AP's bringup */ -#ifndef CONFIG_SMP - zap_low_mappings(); + memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t)); #endif } -extern char __initdata_begin[], __initdata_end[]; - void free_initmem(void) { unsigned long addr; @@ -464,7 +495,7 @@ void free_initmem(void) totalram_pages++; } memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin); - printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10); + printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10); } #ifdef CONFIG_BLK_DEV_INITRD @@ -491,6 +522,8 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) #else reserve_bootmem(phys, len); #endif + if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) + dma_reserve += len / PAGE_SIZE; } int kern_addr_valid(unsigned long addr) @@ -532,10 +565,6 @@ extern int exception_trace, page_fault_trace; static ctl_table debug_table2[] = { { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL, proc_dointvec }, -#ifdef CONFIG_CHECKING - { 100, "page-fault-trace", &page_fault_trace, sizeof(int), 0644, NULL, - proc_dointvec }, -#endif { 0, } }; diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c index 65417b040c1b..a5663e0bb01c 100644 --- a/arch/x86_64/mm/k8topology.c +++ b/arch/x86_64/mm/k8topology.c @@ -108,6 +108,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) limit >>= 16; limit <<= 24; limit |= (1<<24)-1; + limit++; if (limit > end_pfn << PAGE_SHIFT) limit = end_pfn << PAGE_SHIFT; diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 214803821001..a828a01739cc 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -38,38 +38,57 @@ cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; int numa_off __initdata; -int __init compute_hash_shift(struct node *nodes, int numnodes) + +/* + * Given a shift value, try to populate memnodemap[] + * Returns : + * 1 if OK + * 0 if memnodmap[] too small (of shift too small) + * -1 if node overlap or lost ram (shift too big) + */ +static int __init populate_memnodemap( + const struct node *nodes, int numnodes, int shift) { int i; - int shift = 20; - unsigned long addr,maxend=0; - - for (i = 0; i < numnodes; i++) - if ((nodes[i].start != nodes[i].end) && (nodes[i].end > maxend)) - maxend = nodes[i].end; + int res = -1; + unsigned long addr, end; - while ((1UL << shift) < (maxend / NODEMAPSIZE)) - shift++; - - printk (KERN_DEBUG"Using %d for the hash shift. Max adder is %lx \n", - shift,maxend); - memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE); + memset(memnodemap, 0xff, sizeof(memnodemap)); for (i = 0; i < numnodes; i++) { - if (nodes[i].start == nodes[i].end) + addr = nodes[i].start; + end = nodes[i].end; + if (addr >= end) continue; - for (addr = nodes[i].start; - addr < nodes[i].end; - addr += (1UL << shift)) { - if (memnodemap[addr >> shift] != 0xff) { - printk(KERN_INFO - "Your memory is not aligned you need to rebuild your kernel " - "with a bigger NODEMAPSIZE shift=%d adder=%lu\n", - shift,addr); + if ((end >> shift) >= NODEMAPSIZE) + return 0; + do { + if (memnodemap[addr >> shift] != 0xff) return -1; - } memnodemap[addr >> shift] = i; - } + addr += (1 << shift); + } while (addr < end); + res = 1; } + return res; +} + +int __init compute_hash_shift(struct node *nodes, int numnodes) +{ + int shift = 20; + + while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0) + shift++; + + printk(KERN_DEBUG "Using %d for the hash shift.\n", + shift); + + if (populate_memnodemap(nodes, numnodes, shift) != 1) { + printk(KERN_INFO + "Your memory is not aligned you need to rebuild your kernel " + "with a bigger NODEMAPSIZE shift=%d\n", + shift); + return -1; + } return shift; } @@ -94,7 +113,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en start_pfn = start >> PAGE_SHIFT; end_pfn = end >> PAGE_SHIFT; - memory_present(nodeid, start_pfn, end_pfn); nodedata_phys = find_e820_area(start, end, pgdat_size); if (nodedata_phys == -1L) panic("Cannot find memory pgdat in node %d\n", nodeid); @@ -132,29 +150,14 @@ void __init setup_node_zones(int nodeid) unsigned long start_pfn, end_pfn; unsigned long zones[MAX_NR_ZONES]; unsigned long holes[MAX_NR_ZONES]; - unsigned long dma_end_pfn; - memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES); - memset(holes, 0, sizeof(unsigned long) * MAX_NR_ZONES); + start_pfn = node_start_pfn(nodeid); + end_pfn = node_end_pfn(nodeid); - start_pfn = node_start_pfn(nodeid); - end_pfn = node_end_pfn(nodeid); + Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", + nodeid, start_pfn, end_pfn); - Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn); - - /* All nodes > 0 have a zero length zone DMA */ - dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT; - if (start_pfn < dma_end_pfn) { - zones[ZONE_DMA] = dma_end_pfn - start_pfn; - holes[ZONE_DMA] = e820_hole_size(start_pfn, dma_end_pfn); - zones[ZONE_NORMAL] = end_pfn - dma_end_pfn; - holes[ZONE_NORMAL] = e820_hole_size(dma_end_pfn, end_pfn); - - } else { - zones[ZONE_NORMAL] = end_pfn - start_pfn; - holes[ZONE_NORMAL] = e820_hole_size(start_pfn, end_pfn); - } - + size_zones(zones, holes, start_pfn, end_pfn); free_area_init_node(nodeid, NODE_DATA(nodeid), zones, start_pfn, holes); } @@ -171,7 +174,7 @@ void __init numa_init_array(void) for (i = 0; i < NR_CPUS; i++) { if (cpu_to_node[i] != NUMA_NO_NODE) continue; - cpu_to_node[i] = rr; + numa_set_node(i, rr); rr = next_node(rr, node_online_map); if (rr == MAX_NUMNODES) rr = first_node(node_online_map); @@ -205,8 +208,6 @@ static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn) if (i == numa_fake-1) sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; nodes[i].end = nodes[i].start + sz; - if (i != numa_fake-1) - nodes[i].end--; printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", i, nodes[i].start, nodes[i].end, @@ -257,7 +258,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) nodes_clear(node_online_map); node_set_online(0); for (i = 0; i < NR_CPUS; i++) - cpu_to_node[i] = 0; + numa_set_node(i, 0); node_to_cpumask[0] = cpumask_of_cpu(0); setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); } @@ -267,6 +268,12 @@ __cpuinit void numa_add_cpu(int cpu) set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); } +void __cpuinit numa_set_node(int cpu, int node) +{ + cpu_pda[cpu].nodenumber = node; + cpu_to_node[cpu] = node; +} + unsigned long __init numa_free_all_bootmem(void) { int i; @@ -277,9 +284,26 @@ unsigned long __init numa_free_all_bootmem(void) return pages; } +#ifdef CONFIG_SPARSEMEM +static void __init arch_sparse_init(void) +{ + int i; + + for_each_online_node(i) + memory_present(i, node_start_pfn(i), node_end_pfn(i)); + + sparse_init(); +} +#else +#define arch_sparse_init() do {} while (0) +#endif + void __init paging_init(void) { int i; + + arch_sparse_init(); + for_each_online_node(i) { setup_node_zones(i); } diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c index 4b2e844c15a7..33340bd1e328 100644 --- a/arch/x86_64/mm/srat.c +++ b/arch/x86_64/mm/srat.c @@ -71,8 +71,6 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end) nd->start = nd->end; } if (nd->end > end) { - if (!(end & 0xfff)) - end--; nd->end = end; if (nd->start > nd->end) nd->start = nd->end; @@ -166,8 +164,6 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) if (nd->end < end) nd->end = end; } - if (!(nd->end & 0xfff)) - nd->end--; printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, nd->start, nd->end); } @@ -203,7 +199,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) if (cpu_to_node[i] == NUMA_NO_NODE) continue; if (!node_isset(cpu_to_node[i], nodes_parsed)) - cpu_to_node[i] = NUMA_NO_NODE; + numa_set_node(i, NUMA_NO_NODE); } numa_init_array(); return 0; diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index 3226aa11c6ef..2942d32280a5 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c @@ -255,7 +255,7 @@ scsi_cmd_stack_free(int ctlr) #define DEVICETYPE(n) (n<0 || n>MAX_SCSI_DEVICE_CODE) ? \ "Unknown" : scsi_device_types[n] -#if 1 +#if 0 static int xmargin=8; static int amargin=60; diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index fdf4370db994..970f70d498f4 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -735,7 +735,7 @@ config SGI_IP27_RTC config GEN_RTC tristate "Generic /dev/rtc emulation" - depends on RTC!=y && !IA64 && !ARM && !PPC64 && !M32R && !SPARC32 && !SPARC64 + depends on RTC!=y && !IA64 && !ARM && !M32R && !SPARC32 && !SPARC64 ---help--- If you say Y here and create a character special file /dev/rtc with major number 10 and minor number 135 using mknod ("man mknod"), you diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 78ce98a69f37..76589782adcb 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -57,9 +57,8 @@ static int nr_garts; static struct pci_dev * hammers[MAX_HAMMER_GARTS]; static struct resource *aperture_resource; -static int __initdata agp_try_unsupported; +static int __initdata agp_try_unsupported = 1; -static int gart_iterator; #define for_each_nb() for(gart_iterator=0;gart_iterator<nr_garts;gart_iterator++) static void flush_amd64_tlb(struct pci_dev *dev) @@ -73,6 +72,7 @@ static void flush_amd64_tlb(struct pci_dev *dev) static void amd64_tlbflush(struct agp_memory *temp) { + int gart_iterator; for_each_nb() flush_amd64_tlb(hammers[gart_iterator]); } @@ -222,6 +222,7 @@ static struct aper_size_info_32 amd_8151_sizes[7] = static int amd_8151_configure(void) { unsigned long gatt_bus = virt_to_gart(agp_bridge->gatt_table_real); + int gart_iterator; /* Configure AGP regs in each x86-64 host bridge. */ for_each_nb() { @@ -235,7 +236,7 @@ static int amd_8151_configure(void) static void amd64_cleanup(void) { u32 tmp; - + int gart_iterator; for_each_nb() { /* disable gart translation */ pci_read_config_dword (hammers[gart_iterator], AMD64_GARTAPERTURECTL, &tmp); @@ -697,6 +698,16 @@ static struct pci_device_id agp_amd64_pci_table[] = { .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, }, + /* ALI/ULI M1695 */ + { + .class = (PCI_CLASS_BRIDGE_HOST << 8), + .class_mask = ~0, + .vendor = PCI_VENDOR_ID_AL, + .device = 0x1689, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + }, + { } }; diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index d16bd4b5c117..6b302a930e5f 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -48,7 +48,7 @@ #define PFX "IPMI message handler: " -#define IPMI_DRIVER_VERSION "36.0" +#define IPMI_DRIVER_VERSION "38.0" static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void); static int ipmi_init_msghandler(void); diff --git a/drivers/isdn/hisax/hfc_usb.c b/drivers/isdn/hisax/hfc_usb.c index 32bf0d5d0f9a..f8457ef48826 100644 --- a/drivers/isdn/hisax/hfc_usb.c +++ b/drivers/isdn/hisax/hfc_usb.c @@ -71,78 +71,68 @@ typedef struct { /****************************************/ static struct usb_device_id hfcusb_idtab[] = { { - .idVendor = 0x0959, - .idProduct = 0x2bd0, + USB_DEVICE(0x0959, 0x2bd0), .driver_info = (unsigned long) &((hfcsusb_vdata) {LED_OFF, {4, 0, 2, 1}, "ISDN USB TA (Cologne Chip HFC-S USB based)"}), }, { - .idVendor = 0x0675, - .idProduct = 0x1688, + USB_DEVICE(0x0675, 0x1688), .driver_info = (unsigned long) &((hfcsusb_vdata) {LED_SCHEME1, {1, 2, 0, 0}, "DrayTek miniVigor 128 USB ISDN TA"}), }, { - .idVendor = 0x07b0, - .idProduct = 0x0007, + USB_DEVICE(0x07b0, 0x0007), .driver_info = (unsigned long) &((hfcsusb_vdata) {LED_SCHEME1, {0x80, -64, -32, -16}, "Billion tiny USB ISDN TA 128"}), }, { - .idVendor = 0x0742, - .idProduct = 0x2008, + USB_DEVICE(0x0742, 0x2008), .driver_info = (unsigned long) &((hfcsusb_vdata) {LED_SCHEME1, {4, 0, 2, 1}, "Stollmann USB TA"}), }, { - .idVendor = 0x0742, - .idProduct = 0x2009, + USB_DEVICE(0x0742, 0x2009), .driver_info = (unsigned long) &((hfcsusb_vdata) {LED_SCHEME1, {4, 0, 2, 1}, "Aceex USB ISDN TA"}), }, { - .idVendor = 0x0742, - .idProduct = 0x200A, + USB_DEVICE(0x0742, 0x200A), .driver_info = (unsigned long) &((hfcsusb_vdata) {LED_SCHEME1, {4, 0, 2, 1}, "OEM USB ISDN TA"}), }, { - .idVendor = 0x08e3, - .idProduct = 0x0301, + USB_DEVICE(0x08e3, 0x0301), .driver_info = (unsigned long) &((hfcsusb_vdata) {LED_SCHEME1, {2, 0, 1, 4}, "Olitec USB RNIS"}), }, { - .idVendor = 0x07fa, - .idProduct = 0x0846, + USB_DEVICE(0x07fa, 0x0846), .driver_info = (unsigned long) &((hfcsusb_vdata) {LED_SCHEME1, {0x80, -64, -32, -16}, "Bewan Modem RNIS USB"}), }, { - .idVendor = 0x07fa, - .idProduct = 0x0847, + USB_DEVICE(0x07fa, 0x0847), .driver_info = (unsigned long) &((hfcsusb_vdata) {LED_SCHEME1, {0x80, -64, -32, -16}, "Djinn Numeris USB"}), }, { - .idVendor = 0x07b0, - .idProduct = 0x0006, + USB_DEVICE(0x07b0, 0x0006), .driver_info = (unsigned long) &((hfcsusb_vdata) {LED_SCHEME1, {0x80, -64, -32, -16}, "Twister ISDN TA"}), }, + { } }; - /***************************************************************/ /* structure defining input+output fifos (interrupt/bulk mode) */ /***************************************************************/ diff --git a/drivers/md/md.c b/drivers/md/md.c index adf960d8a7c9..f3fed662f32e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3156,7 +3156,7 @@ static int md_ioctl(struct inode *inode, struct file *file, if (cnt > 0 ) { printk(KERN_WARNING "md: %s(pid %d) used deprecated START_ARRAY ioctl. " - "This will not be supported beyond 2.6\n", + "This will not be supported beyond July 2006\n", current->comm, current->pid); cnt--; } @@ -3437,10 +3437,19 @@ static int md_thread(void * arg) allow_signal(SIGKILL); while (!kthread_should_stop()) { - wait_event_timeout(thread->wqueue, - test_bit(THREAD_WAKEUP, &thread->flags) - || kthread_should_stop(), - thread->timeout); + /* We need to wait INTERRUPTIBLE so that + * we don't add to the load-average. + * That means we need to be sure no signals are + * pending + */ + if (signal_pending(current)) + flush_signals(current); + + wait_event_interruptible_timeout + (thread->wqueue, + test_bit(THREAD_WAKEUP, &thread->flags) + || kthread_should_stop(), + thread->timeout); try_to_freeze(); clear_bit(THREAD_WAKEUP, &thread->flags); diff --git a/drivers/net/wan/sdladrv.c b/drivers/net/wan/sdladrv.c index 7c2cf2e76300..032c0f81928e 100644 --- a/drivers/net/wan/sdladrv.c +++ b/drivers/net/wan/sdladrv.c @@ -1994,7 +1994,7 @@ static int detect_s514 (sdlahw_t* hw) modname, hw->irq); /* map the physical PCI memory to virtual memory */ - (void *)hw->dpmbase = ioremap((unsigned long)S514_mem_base_addr, + hw->dpmbase = ioremap((unsigned long)S514_mem_base_addr, (unsigned long)MAX_SIZEOF_S514_MEMORY); /* map the physical control register memory to virtual memory */ hw->vector = (unsigned long)ioremap( diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c index 4e96ec5f2ff9..894e7113e0b3 100644 --- a/drivers/scsi/ahci.c +++ b/drivers/scsi/ahci.c @@ -565,6 +565,17 @@ static void ahci_intr_error(struct ata_port *ap, u32 irq_stat) u32 tmp; int work; + printk(KERN_WARNING "ata%u: port reset, " + "p_is %x is %x pis %x cmd %x tf %x ss %x se %x\n", + ap->id, + irq_stat, + readl(mmio + HOST_IRQ_STAT), + readl(port_mmio + PORT_IRQ_STAT), + readl(port_mmio + PORT_CMD), + readl(port_mmio + PORT_TFDATA), + readl(port_mmio + PORT_SCR_STAT), + readl(port_mmio + PORT_SCR_ERR)); + /* stop DMA */ tmp = readl(port_mmio + PORT_CMD); tmp &= ~PORT_CMD_START; @@ -602,8 +613,6 @@ static void ahci_intr_error(struct ata_port *ap, u32 irq_stat) tmp |= PORT_CMD_START; writel(tmp, port_mmio + PORT_CMD); readl(port_mmio + PORT_CMD); /* flush */ - - printk(KERN_WARNING "ata%u: error occurred, port reset\n", ap->id); } static void ahci_eng_timeout(struct ata_port *ap) @@ -614,17 +623,17 @@ static void ahci_eng_timeout(struct ata_port *ap) struct ata_queued_cmd *qc; unsigned long flags; - DPRINTK("ENTER\n"); + printk(KERN_WARNING "ata%u: handling error/timeout\n", ap->id); spin_lock_irqsave(&host_set->lock, flags); - ahci_intr_error(ap, readl(port_mmio + PORT_IRQ_STAT)); - qc = ata_qc_from_tag(ap, ap->active_tag); if (!qc) { printk(KERN_ERR "ata%u: BUG: timeout without command\n", ap->id); } else { + ahci_intr_error(ap, readl(port_mmio + PORT_IRQ_STAT)); + /* hack alert! We cannot use the supplied completion * function from inside the ->eh_strategy_handler() thread. * libata is the only user of ->eh_strategy_handler() in @@ -659,9 +668,19 @@ static inline int ahci_host_intr(struct ata_port *ap, struct ata_queued_cmd *qc) } if (status & PORT_IRQ_FATAL) { + unsigned int err_mask; + if (status & PORT_IRQ_TF_ERR) + err_mask = AC_ERR_DEV; + else if (status & PORT_IRQ_IF_ERR) + err_mask = AC_ERR_ATA_BUS; + else + err_mask = AC_ERR_HOST_BUS; + + /* command processing has stopped due to error; restart */ ahci_intr_error(ap, status); + if (qc) - ata_qc_complete(qc, AC_ERR_OTHER); + ata_qc_complete(qc, err_mask); } return 1; diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index d81db3a3d4b9..ba1eb8b38e00 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -1263,7 +1263,7 @@ retry: } /* ATAPI-specific feature tests */ - else { + else if (dev->class == ATA_DEV_ATAPI) { if (ata_id_is_ata(dev->id)) /* sanity check */ goto err_out_nosup; @@ -2399,7 +2399,7 @@ static void ata_sg_clean(struct ata_queued_cmd *qc) if (qc->flags & ATA_QCFLAG_SINGLE) assert(qc->n_elem == 1); - DPRINTK("unmapping %u sg elements\n", qc->n_elem); + VPRINTK("unmapping %u sg elements\n", qc->n_elem); /* if we padded the buffer out to 32-bit bound, and data * xfer direction is from-device, we must copy from the @@ -2409,7 +2409,8 @@ static void ata_sg_clean(struct ata_queued_cmd *qc) pad_buf = ap->pad + (qc->tag * ATA_DMA_PAD_SZ); if (qc->flags & ATA_QCFLAG_SG) { - dma_unmap_sg(ap->host_set->dev, sg, qc->n_elem, dir); + if (qc->n_elem) + dma_unmap_sg(ap->host_set->dev, sg, qc->n_elem, dir); /* restore last sg */ sg[qc->orig_n_elem - 1].length += qc->pad_len; if (pad_buf) { @@ -2419,8 +2420,10 @@ static void ata_sg_clean(struct ata_queued_cmd *qc) kunmap_atomic(psg->page, KM_IRQ0); } } else { - dma_unmap_single(ap->host_set->dev, sg_dma_address(&sg[0]), - sg_dma_len(&sg[0]), dir); + if (sg_dma_len(&sg[0]) > 0) + dma_unmap_single(ap->host_set->dev, + sg_dma_address(&sg[0]), sg_dma_len(&sg[0]), + dir); /* restore sg */ sg->length += qc->pad_len; if (pad_buf) @@ -2619,6 +2622,11 @@ static int ata_sg_setup_one(struct ata_queued_cmd *qc) sg->length, qc->pad_len); } + if (!sg->length) { + sg_dma_address(sg) = 0; + goto skip_map; + } + dma_address = dma_map_single(ap->host_set->dev, qc->buf_virt, sg->length, dir); if (dma_mapping_error(dma_address)) { @@ -2628,6 +2636,7 @@ static int ata_sg_setup_one(struct ata_queued_cmd *qc) } sg_dma_address(sg) = dma_address; +skip_map: sg_dma_len(sg) = sg->length; DPRINTK("mapped buffer of %d bytes for %s\n", sg_dma_len(sg), @@ -2655,7 +2664,7 @@ static int ata_sg_setup(struct ata_queued_cmd *qc) struct ata_port *ap = qc->ap; struct scatterlist *sg = qc->__sg; struct scatterlist *lsg = &sg[qc->n_elem - 1]; - int n_elem, dir; + int n_elem, pre_n_elem, dir, trim_sg = 0; VPRINTK("ENTER, ata%u\n", ap->id); assert(qc->flags & ATA_QCFLAG_SG); @@ -2689,13 +2698,24 @@ static int ata_sg_setup(struct ata_queued_cmd *qc) sg_dma_len(psg) = ATA_DMA_PAD_SZ; /* trim last sg */ lsg->length -= qc->pad_len; + if (lsg->length == 0) + trim_sg = 1; DPRINTK("padding done, sg[%d].length=%u pad_len=%u\n", qc->n_elem - 1, lsg->length, qc->pad_len); } + pre_n_elem = qc->n_elem; + if (trim_sg && pre_n_elem) + pre_n_elem--; + + if (!pre_n_elem) { + n_elem = 0; + goto skip_map; + } + dir = qc->dma_dir; - n_elem = dma_map_sg(ap->host_set->dev, sg, qc->n_elem, dir); + n_elem = dma_map_sg(ap->host_set->dev, sg, pre_n_elem, dir); if (n_elem < 1) { /* restore last sg */ lsg->length += qc->pad_len; @@ -2704,6 +2724,7 @@ static int ata_sg_setup(struct ata_queued_cmd *qc) DPRINTK("%d sg elements mapped\n", n_elem); +skip_map: qc->n_elem = n_elem; return 0; @@ -3263,32 +3284,11 @@ static void ata_qc_timeout(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; struct ata_host_set *host_set = ap->host_set; - struct ata_device *dev = qc->dev; u8 host_stat = 0, drv_stat; unsigned long flags; DPRINTK("ENTER\n"); - /* FIXME: doesn't this conflict with timeout handling? */ - if (qc->dev->class == ATA_DEV_ATAPI && qc->scsicmd) { - struct scsi_cmnd *cmd = qc->scsicmd; - - if (!(cmd->eh_eflags & SCSI_EH_CANCEL_CMD)) { - - /* finish completing original command */ - spin_lock_irqsave(&host_set->lock, flags); - __ata_qc_complete(qc); - spin_unlock_irqrestore(&host_set->lock, flags); - - atapi_request_sense(ap, dev, cmd); - - cmd->result = (CHECK_CONDITION << 1) | (DID_OK << 16); - scsi_finish_command(cmd); - - goto out; - } - } - spin_lock_irqsave(&host_set->lock, flags); /* hack alert! We cannot use the supplied completion @@ -3327,7 +3327,6 @@ static void ata_qc_timeout(struct ata_queued_cmd *qc) spin_unlock_irqrestore(&host_set->lock, flags); -out: DPRINTK("EXIT\n"); } @@ -3411,16 +3410,11 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_port *ap, qc = ata_qc_new(ap); if (qc) { - qc->__sg = NULL; - qc->flags = 0; qc->scsicmd = NULL; qc->ap = ap; qc->dev = dev; - qc->cursect = qc->cursg = qc->cursg_ofs = 0; - qc->nsect = 0; - qc->nbytes = qc->curbytes = 0; - ata_tf_init(ap, &qc->tf, dev->devno); + ata_qc_reinit(qc); } return qc; diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c index 0df4b682965d..3b4ca55a3332 100644 --- a/drivers/scsi/libata-scsi.c +++ b/drivers/scsi/libata-scsi.c @@ -1955,22 +1955,44 @@ void ata_scsi_badcmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *), u8 done(cmd); } -void atapi_request_sense(struct ata_port *ap, struct ata_device *dev, - struct scsi_cmnd *cmd) +static int atapi_sense_complete(struct ata_queued_cmd *qc,unsigned int err_mask) { - DECLARE_COMPLETION(wait); - struct ata_queued_cmd *qc; - unsigned long flags; - int rc; + if (err_mask && ((err_mask & AC_ERR_DEV) == 0)) + /* FIXME: not quite right; we don't want the + * translation of taskfile registers into + * a sense descriptors, since that's only + * correct for ATA, not ATAPI + */ + ata_gen_ata_desc_sense(qc); - DPRINTK("ATAPI request sense\n"); + qc->scsidone(qc->scsicmd); + return 0; +} - qc = ata_qc_new_init(ap, dev); - BUG_ON(qc == NULL); +/* is it pointless to prefer PIO for "safety reasons"? */ +static inline int ata_pio_use_silly(struct ata_port *ap) +{ + return (ap->flags & ATA_FLAG_PIO_DMA); +} + +static void atapi_request_sense(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct scsi_cmnd *cmd = qc->scsicmd; + + DPRINTK("ATAPI request sense\n"); /* FIXME: is this needed? */ memset(cmd->sense_buffer, 0, sizeof(cmd->sense_buffer)); + ap->ops->tf_read(ap, &qc->tf); + + /* fill these in, for the case where they are -not- overwritten */ + cmd->sense_buffer[0] = 0x70; + cmd->sense_buffer[2] = qc->tf.feature >> 4; + + ata_qc_reinit(qc); + ata_sg_init_one(qc, cmd->sense_buffer, sizeof(cmd->sense_buffer)); qc->dma_dir = DMA_FROM_DEVICE; @@ -1981,22 +2003,20 @@ void atapi_request_sense(struct ata_port *ap, struct ata_device *dev, qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; qc->tf.command = ATA_CMD_PACKET; - qc->tf.protocol = ATA_PROT_ATAPI; - qc->tf.lbam = (8 * 1024) & 0xff; - qc->tf.lbah = (8 * 1024) >> 8; + if (ata_pio_use_silly(ap)) { + qc->tf.protocol = ATA_PROT_ATAPI_DMA; + qc->tf.feature |= ATAPI_PKT_DMA; + } else { + qc->tf.protocol = ATA_PROT_ATAPI; + qc->tf.lbam = (8 * 1024) & 0xff; + qc->tf.lbah = (8 * 1024) >> 8; + } qc->nbytes = SCSI_SENSE_BUFFERSIZE; - qc->waiting = &wait; - qc->complete_fn = ata_qc_complete_noop; + qc->complete_fn = atapi_sense_complete; - spin_lock_irqsave(&ap->host_set->lock, flags); - rc = ata_qc_issue(qc); - spin_unlock_irqrestore(&ap->host_set->lock, flags); - - if (rc) - ata_port_disable(ap); - else - wait_for_completion(&wait); + if (ata_qc_issue(qc)) + ata_qc_complete(qc, AC_ERR_OTHER); DPRINTK("EXIT\n"); } @@ -2008,19 +2028,8 @@ static int atapi_qc_complete(struct ata_queued_cmd *qc, unsigned int err_mask) VPRINTK("ENTER, err_mask 0x%X\n", err_mask); if (unlikely(err_mask & AC_ERR_DEV)) { - DPRINTK("request check condition\n"); - - /* FIXME: command completion with check condition - * but no sense causes the error handler to run, - * which then issues REQUEST SENSE, fills in the sense - * buffer, and completes the command (for the second - * time). We need to issue REQUEST SENSE some other - * way, to avoid completing the command twice. - */ cmd->result = SAM_STAT_CHECK_CONDITION; - - qc->scsidone(cmd); - + atapi_request_sense(qc); return 1; } diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h index fad051ca4672..74a84e0ec0a4 100644 --- a/drivers/scsi/libata.h +++ b/drivers/scsi/libata.h @@ -54,8 +54,6 @@ extern int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg); /* libata-scsi.c */ -extern void atapi_request_sense(struct ata_port *ap, struct ata_device *dev, - struct scsi_cmnd *cmd); extern void ata_scsi_scan_host(struct ata_port *ap); extern int ata_scsi_error(struct Scsi_Host *host); extern unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf, diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 886dbd116899..0b49f9e070f1 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -13,5 +13,6 @@ extern char _eextratext[] __attribute__((weak)); extern char _end[]; extern char __per_cpu_start[], __per_cpu_end[]; extern char __kprobes_text_start[], __kprobes_text_end[]; +extern char __initdata_begin[], __initdata_end[]; #endif /* _ASM_GENERIC_SECTIONS_H_ */ diff --git a/include/asm-i386/mach-default/mach_reboot.h b/include/asm-i386/mach-default/mach_reboot.h index 06ae4d81ba6a..a955e57ad016 100644 --- a/include/asm-i386/mach-default/mach_reboot.h +++ b/include/asm-i386/mach-default/mach_reboot.h @@ -19,7 +19,7 @@ static inline void kb_wait(void) static inline void mach_reboot(void) { int i; - for (i = 0; i < 100; i++) { + for (i = 0; i < 10; i++) { kb_wait(); udelay(50); outb(0x60, 0x64); /* write Controller Command Byte */ diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 8c02b0318703..5c96cf6dcb39 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -65,7 +65,9 @@ struct cpuinfo_x86 { int f00f_bug; int coma_bug; unsigned long loops_per_jiffy; - unsigned char x86_num_cores; + unsigned char x86_max_cores; /* cpuid returned max cores value */ + unsigned char booted_cores; /* number of cores as seen by OS */ + unsigned char apicid; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define X86_VENDOR_INTEL 0 diff --git a/include/asm-ppc64/btext.h b/include/asm-powerpc/btext.h index 71cce36bc630..71cce36bc630 100644 --- a/include/asm-ppc64/btext.h +++ b/include/asm-powerpc/btext.h diff --git a/include/asm-ppc64/delay.h b/include/asm-powerpc/delay.h index 05f198cf73d9..1492aa9ab716 100644 --- a/include/asm-ppc64/delay.h +++ b/include/asm-powerpc/delay.h @@ -1,5 +1,5 @@ -#ifndef _PPC64_DELAY_H -#define _PPC64_DELAY_H +#ifndef _ASM_POWERPC_DELAY_H +#define _ASM_POWERPC_DELAY_H /* * Copyright 1996, Paul Mackerras. @@ -15,10 +15,17 @@ extern unsigned long tb_ticks_per_usec; -/* define these here to prevent circular dependencies */ +#ifdef CONFIG_PPC64 +/* define these here to prevent circular dependencies */ +/* these instructions control the thread priority on multi-threaded cpus */ #define __HMT_low() asm volatile("or 1,1,1") #define __HMT_medium() asm volatile("or 2,2,2") -#define __barrier() asm volatile("":::"memory") +#else +#define __HMT_low() +#define __HMT_medium() +#endif + +#define __barrier() asm volatile("" ::: "memory") static inline unsigned long __get_tb(void) { @@ -32,7 +39,7 @@ static inline void __delay(unsigned long loops) { unsigned long start = __get_tb(); - while((__get_tb()-start) < loops) + while((__get_tb() - start) < loops) __HMT_low(); __HMT_medium(); __barrier(); @@ -45,4 +52,4 @@ static inline void udelay(unsigned long usecs) __delay(loops); } -#endif /* _PPC64_DELAY_H */ +#endif /* _ASM_POWERPC_DELAY_H */ diff --git a/include/asm-ppc64/eeh.h b/include/asm-powerpc/eeh.h index 89f26ab31908..89f26ab31908 100644 --- a/include/asm-ppc64/eeh.h +++ b/include/asm-powerpc/eeh.h diff --git a/include/asm-ppc64/floppy.h b/include/asm-powerpc/floppy.h index 5c497b588e54..64276a3f6153 100644 --- a/include/asm-ppc64/floppy.h +++ b/include/asm-powerpc/floppy.h @@ -7,22 +7,22 @@ * * Copyright (C) 1995 */ -#ifndef __ASM_PPC64_FLOPPY_H -#define __ASM_PPC64_FLOPPY_H +#ifndef __ASM_POWERPC_FLOPPY_H +#define __ASM_POWERPC_FLOPPY_H #include <linux/config.h> #include <asm/machdep.h> -#define fd_inb(port) inb_p(port) -#define fd_outb(value,port) outb_p(value,port) +#define fd_inb(port) inb_p(port) +#define fd_outb(value,port) outb_p(value,port) #define fd_enable_dma() enable_dma(FLOPPY_DMA) #define fd_disable_dma() disable_dma(FLOPPY_DMA) -#define fd_request_dma() request_dma(FLOPPY_DMA,"floppy") +#define fd_request_dma() request_dma(FLOPPY_DMA, "floppy") #define fd_free_dma() free_dma(FLOPPY_DMA) #define fd_clear_dma_ff() clear_dma_ff(FLOPPY_DMA) -#define fd_set_dma_mode(mode) set_dma_mode(FLOPPY_DMA,mode) -#define fd_set_dma_count(count) set_dma_count(FLOPPY_DMA,count) +#define fd_set_dma_mode(mode) set_dma_mode(FLOPPY_DMA, mode) +#define fd_set_dma_count(count) set_dma_count(FLOPPY_DMA, count) #define fd_enable_irq() enable_irq(FLOPPY_IRQ) #define fd_disable_irq() disable_irq(FLOPPY_IRQ) #define fd_cacheflush(addr,size) /* nothing */ @@ -35,10 +35,10 @@ #include <linux/pci.h> -#define fd_dma_setup(addr,size,mode,io) ppc64_fd_dma_setup(addr,size,mode,io) +#define fd_dma_setup(addr,size,mode,io) powerpc_fd_dma_setup(addr,size,mode,io) -static __inline__ int -ppc64_fd_dma_setup(char *addr, unsigned long size, int mode, int io) +static __inline__ int powerpc_fd_dma_setup(char *addr, unsigned long size, + int mode, int io) { static unsigned long prev_size; static dma_addr_t bus_addr = 0; @@ -55,9 +55,8 @@ ppc64_fd_dma_setup(char *addr, unsigned long size, int mode, int io) bus_addr = 0; } - if (!bus_addr) /* need to map it */ { + if (!bus_addr) /* need to map it */ bus_addr = pci_map_single(NULL, addr, size, dir); - } /* remember this one as prev */ prev_addr = addr; @@ -103,4 +102,4 @@ static int FDC2 = -1; #define EXTRA_FLOPPY_PARAMS -#endif /* __ASM_PPC64_FLOPPY_H */ +#endif /* __ASM_POWERPC_FLOPPY_H */ diff --git a/include/asm-ppc64/hvconsole.h b/include/asm-powerpc/hvconsole.h index 6da93ce74dc0..6da93ce74dc0 100644 --- a/include/asm-ppc64/hvconsole.h +++ b/include/asm-powerpc/hvconsole.h diff --git a/include/asm-ppc64/hvcserver.h b/include/asm-powerpc/hvcserver.h index aecba9665796..aecba9665796 100644 --- a/include/asm-ppc64/hvcserver.h +++ b/include/asm-powerpc/hvcserver.h diff --git a/include/asm-powerpc/kexec.h b/include/asm-powerpc/kexec.h index 062ab9ba68eb..c72ffc709ea8 100644 --- a/include/asm-powerpc/kexec.h +++ b/include/asm-powerpc/kexec.h @@ -40,6 +40,7 @@ extern note_buf_t crash_notes[]; #ifdef __powerpc64__ extern void kexec_smp_wait(void); /* get and clear naca physid, wait for master to copy new code to 0 */ +extern void __init kexec_setup(void); #else struct kimage; extern void machine_kexec_simple(struct kimage *image); diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h index 5670f0cd6143..c011abb8b600 100644 --- a/include/asm-powerpc/machdep.h +++ b/include/asm-powerpc/machdep.h @@ -93,7 +93,9 @@ struct machdep_calls { void (*init_IRQ)(void); int (*get_irq)(struct pt_regs *); - void (*cpu_irq_down)(int secondary); +#ifdef CONFIG_KEXEC + void (*kexec_cpu_down)(int crash_shutdown, int secondary); +#endif /* PCI stuff */ /* Called after scanning the bus, before allocating resources */ diff --git a/include/asm-ppc64/nvram.h b/include/asm-powerpc/nvram.h index def47d720d3d..24bd8c2388ea 100644 --- a/include/asm-ppc64/nvram.h +++ b/include/asm-powerpc/nvram.h @@ -1,6 +1,5 @@ /* - * PreP compliant NVRAM access - * This needs to be updated for PPC64 + * NVRAM definitions and access functions. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -8,8 +7,8 @@ * 2 of the License, or (at your option) any later version. */ -#ifndef _PPC64_NVRAM_H -#define _PPC64_NVRAM_H +#ifndef _ASM_POWERPC_NVRAM_H +#define _ASM_POWERPC_NVRAM_H #define NVRW_CNT 0x20 #define NVRAM_HEADER_LEN 16 /* sizeof(struct nvram_header) */ @@ -69,7 +68,6 @@ extern int nvram_clear_error_log(void); extern struct nvram_partition *nvram_find_partition(int sig, const char *name); extern int pSeries_nvram_init(void); -extern int pmac_nvram_init(void); extern int mmio_nvram_init(void); /* PowerMac specific nvram stuffs */ @@ -88,7 +86,11 @@ extern u8 pmac_xpram_read(int xpaddr); extern void pmac_xpram_write(int xpaddr, u8 data); /* Synchronize NVRAM */ -extern int nvram_sync(void); +extern void nvram_sync(void); + +/* Normal access to NVRAM */ +extern unsigned char nvram_read_byte(int i); +extern void nvram_write_byte(unsigned char c, int i); /* Some offsets in XPRAM */ #define PMAC_XPRAM_MACHINE_LOC 0xe4 @@ -112,5 +114,6 @@ struct pmac_machine_location { _IOWR('p', 0x40, int) #define IOC_NVRAM_GET_OFFSET _IOWR('p', 0x42, int) /* Get NVRAM partition offset */ +#define IOC_NVRAM_SYNC _IO('p', 0x43) /* Sync NVRAM image */ -#endif /* _PPC64_NVRAM_H */ +#endif /* _ASM_POWERPC_NVRAM_H */ diff --git a/include/asm-powerpc/page.h b/include/asm-powerpc/page.h new file mode 100644 index 000000000000..18c1e5ee81a3 --- /dev/null +++ b/include/asm-powerpc/page.h @@ -0,0 +1,179 @@ +#ifndef _ASM_POWERPC_PAGE_H +#define _ASM_POWERPC_PAGE_H + +/* + * Copyright (C) 2001,2005 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifdef __KERNEL__ +#include <linux/config.h> +#include <asm/asm-compat.h> + +/* + * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software + * page size. When using 64K pages however, whether we are really supporting + * 64K pages in HW or not is irrelevant to those definitions. + */ +#ifdef CONFIG_PPC_64K_PAGES +#define PAGE_SHIFT 16 +#else +#define PAGE_SHIFT 12 +#endif + +#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) + +/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */ +#define __HAVE_ARCH_GATE_AREA 1 + +/* + * Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we + * assign PAGE_MASK to a larger type it gets extended the way we want + * (i.e. with 1s in the high bits) + */ +#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) + +#define PAGE_OFFSET ASM_CONST(CONFIG_KERNEL_START) +#define KERNELBASE PAGE_OFFSET + +#ifdef CONFIG_DISCONTIGMEM +#define page_to_pfn(page) discontigmem_page_to_pfn(page) +#define pfn_to_page(pfn) discontigmem_pfn_to_page(pfn) +#define pfn_valid(pfn) discontigmem_pfn_valid(pfn) +#endif + +#ifdef CONFIG_FLATMEM +#define pfn_to_page(pfn) (mem_map + (pfn)) +#define page_to_pfn(page) ((unsigned long)((page) - mem_map)) +#define pfn_valid(pfn) ((pfn) < max_mapnr) +#endif + +#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) +#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) +#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) + +#define __va(x) ((void *)((unsigned long)(x) + KERNELBASE)) +#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET) + +/* + * Unfortunately the PLT is in the BSS in the PPC32 ELF ABI, + * and needs to be executable. This means the whole heap ends + * up being executable. + */ +#define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#ifdef __powerpc64__ +#include <asm/page_64.h> +#else +#include <asm/page_32.h> +#endif + +/* align addr on a size boundary - adjust address up/down if needed */ +#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1))) +#define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1))) + +/* align addr on a size boundary - adjust address up if needed */ +#define _ALIGN(addr,size) _ALIGN_UP(addr,size) + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) + +#ifndef __ASSEMBLY__ + +#undef STRICT_MM_TYPECHECKS + +#ifdef STRICT_MM_TYPECHECKS +/* These are used to make use of C type-checking. */ + +/* PTE level */ +typedef struct { pte_basic_t pte; } pte_t; +#define pte_val(x) ((x).pte) +#define __pte(x) ((pte_t) { (x) }) + +/* 64k pages additionally define a bigger "real PTE" type that gathers + * the "second half" part of the PTE for pseudo 64k pages + */ +#ifdef CONFIG_PPC_64K_PAGES +typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; +#else +typedef struct { pte_t pte; } real_pte_t; +#endif + +/* PMD level */ +typedef struct { unsigned long pmd; } pmd_t; +#define pmd_val(x) ((x).pmd) +#define __pmd(x) ((pmd_t) { (x) }) + +/* PUD level exusts only on 4k pages */ +#ifndef CONFIG_PPC_64K_PAGES +typedef struct { unsigned long pud; } pud_t; +#define pud_val(x) ((x).pud) +#define __pud(x) ((pud_t) { (x) }) +#endif + +/* PGD level */ +typedef struct { unsigned long pgd; } pgd_t; +#define pgd_val(x) ((x).pgd) +#define __pgd(x) ((pgd_t) { (x) }) + +/* Page protection bits */ +typedef struct { unsigned long pgprot; } pgprot_t; +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) }) + +#else + +/* + * .. while these make it easier on the compiler + */ + +typedef pte_basic_t pte_t; +#define pte_val(x) (x) +#define __pte(x) (x) + +#ifdef CONFIG_PPC_64K_PAGES +typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; +#else +typedef unsigned long real_pte_t; +#endif + + +typedef unsigned long pmd_t; +#define pmd_val(x) (x) +#define __pmd(x) (x) + +#ifndef CONFIG_PPC_64K_PAGES +typedef unsigned long pud_t; +#define pud_val(x) (x) +#define __pud(x) (x) +#endif + +typedef unsigned long pgd_t; +#define pgd_val(x) (x) +#define pgprot_val(x) (x) + +typedef unsigned long pgprot_t; +#define __pgd(x) (x) +#define __pgprot(x) (x) + +#endif + +struct page; +extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg); +extern void copy_user_page(void *to, void *from, unsigned long vaddr, + struct page *p); +extern int page_is_ram(unsigned long pfn); + +#endif /* __ASSEMBLY__ */ + +#endif /* __KERNEL__ */ + +#endif /* _ASM_POWERPC_PAGE_H */ diff --git a/include/asm-powerpc/page_32.h b/include/asm-powerpc/page_32.h new file mode 100644 index 000000000000..7259cfd85da9 --- /dev/null +++ b/include/asm-powerpc/page_32.h @@ -0,0 +1,40 @@ +#ifndef _ASM_POWERPC_PAGE_32_H +#define _ASM_POWERPC_PAGE_32_H + +#define VM_DATA_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS32 + +#define PPC_MEMSTART 0 + +#ifndef __ASSEMBLY__ +/* + * The basic type of a PTE - 64 bits for those CPUs with > 32 bit + * physical addressing. For now this just the IBM PPC440. + */ +#ifdef CONFIG_PTE_64BIT +typedef unsigned long long pte_basic_t; +#define PTE_SHIFT (PAGE_SHIFT - 3) /* 512 ptes per page */ +#define PTE_FMT "%16Lx" +#else +typedef unsigned long pte_basic_t; +#define PTE_SHIFT (PAGE_SHIFT - 2) /* 1024 ptes per page */ +#define PTE_FMT "%.8lx" +#endif + +struct page; +extern void clear_pages(void *page, int order); +static inline void clear_page(void *page) { clear_pages(page, 0); } +extern void copy_page(void *to, void *from); + +/* Pure 2^n version of get_order */ +extern __inline__ int get_order(unsigned long size) +{ + int lz; + + size = (size-1) >> PAGE_SHIFT; + asm ("cntlzw %0,%1" : "=r" (lz) : "r" (size)); + return 32 - lz; +} + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_PAGE_32_H */ diff --git a/include/asm-powerpc/page_64.h b/include/asm-powerpc/page_64.h new file mode 100644 index 000000000000..c16f106b5373 --- /dev/null +++ b/include/asm-powerpc/page_64.h @@ -0,0 +1,174 @@ +#ifndef _ASM_POWERPC_PAGE_64_H +#define _ASM_POWERPC_PAGE_64_H + +/* + * Copyright (C) 2001 PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * We always define HW_PAGE_SHIFT to 12 as use of 64K pages remains Linux + * specific, every notion of page number shared with the firmware, TCEs, + * iommu, etc... still uses a page size of 4K. + */ +#define HW_PAGE_SHIFT 12 +#define HW_PAGE_SIZE (ASM_CONST(1) << HW_PAGE_SHIFT) +#define HW_PAGE_MASK (~(HW_PAGE_SIZE-1)) + +/* + * PAGE_FACTOR is the number of bits factor between PAGE_SHIFT and + * HW_PAGE_SHIFT, that is 4K pages. + */ +#define PAGE_FACTOR (PAGE_SHIFT - HW_PAGE_SHIFT) + +#define REGION_SIZE 4UL +#define REGION_SHIFT 60UL +#define REGION_MASK (((1UL<<REGION_SIZE)-1UL)<<REGION_SHIFT) + +#define VMALLOCBASE ASM_CONST(0xD000000000000000) +#define VMALLOC_REGION_ID (VMALLOCBASE >> REGION_SHIFT) +#define KERNEL_REGION_ID (KERNELBASE >> REGION_SHIFT) +#define USER_REGION_ID (0UL) +#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) + +/* Segment size */ +#define SID_SHIFT 28 +#define SID_MASK 0xfffffffffUL +#define ESID_MASK 0xfffffffff0000000UL +#define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK) + +#ifndef __ASSEMBLY__ +#include <asm/cache.h> + +typedef unsigned long pte_basic_t; + +static __inline__ void clear_page(void *addr) +{ + unsigned long lines, line_size; + + line_size = ppc64_caches.dline_size; + lines = ppc64_caches.dlines_per_page; + + __asm__ __volatile__( + "mtctr %1 # clear_page\n\ +1: dcbz 0,%0\n\ + add %0,%0,%3\n\ + bdnz+ 1b" + : "=r" (addr) + : "r" (lines), "0" (addr), "r" (line_size) + : "ctr", "memory"); +} + +extern void copy_4K_page(void *to, void *from); + +#ifdef CONFIG_PPC_64K_PAGES +static inline void copy_page(void *to, void *from) +{ + unsigned int i; + for (i=0; i < (1 << (PAGE_SHIFT - 12)); i++) { + copy_4K_page(to, from); + to += 4096; + from += 4096; + } +} +#else /* CONFIG_PPC_64K_PAGES */ +static inline void copy_page(void *to, void *from) +{ + copy_4K_page(to, from); +} +#endif /* CONFIG_PPC_64K_PAGES */ + +/* Log 2 of page table size */ +extern u64 ppc64_pft_size; + +/* Large pages size */ +extern unsigned int HPAGE_SHIFT; +#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) + +#endif /* __ASSEMBLY__ */ + +#ifdef CONFIG_HUGETLB_PAGE + +#define HTLB_AREA_SHIFT 40 +#define HTLB_AREA_SIZE (1UL << HTLB_AREA_SHIFT) +#define GET_HTLB_AREA(x) ((x) >> HTLB_AREA_SHIFT) + +#define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \ + - (1U << GET_ESID(addr))) & 0xffff) +#define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \ + - (1U << GET_HTLB_AREA(addr))) & 0xffff) + +#define ARCH_HAS_HUGEPAGE_ONLY_RANGE +#define ARCH_HAS_PREPARE_HUGEPAGE_RANGE +#define ARCH_HAS_SETCLEAR_HUGE_PTE + +#define touches_hugepage_low_range(mm, addr, len) \ + (LOW_ESID_MASK((addr), (len)) & (mm)->context.low_htlb_areas) +#define touches_hugepage_high_range(mm, addr, len) \ + (HTLB_AREA_MASK((addr), (len)) & (mm)->context.high_htlb_areas) + +#define __within_hugepage_low_range(addr, len, segmask) \ + ((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask)) +#define within_hugepage_low_range(addr, len) \ + __within_hugepage_low_range((addr), (len), \ + current->mm->context.low_htlb_areas) +#define __within_hugepage_high_range(addr, len, zonemask) \ + ((HTLB_AREA_MASK((addr), (len)) | (zonemask)) == (zonemask)) +#define within_hugepage_high_range(addr, len) \ + __within_hugepage_high_range((addr), (len), \ + current->mm->context.high_htlb_areas) + +#define is_hugepage_only_range(mm, addr, len) \ + (touches_hugepage_high_range((mm), (addr), (len)) || \ + touches_hugepage_low_range((mm), (addr), (len))) +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA + +#define in_hugepage_area(context, addr) \ + (cpu_has_feature(CPU_FTR_16M_PAGE) && \ + ( ((1 << GET_HTLB_AREA(addr)) & (context).high_htlb_areas) || \ + ( ((addr) < 0x100000000L) && \ + ((1 << GET_ESID(addr)) & (context).low_htlb_areas) ) ) ) + +#else /* !CONFIG_HUGETLB_PAGE */ + +#define in_hugepage_area(mm, addr) 0 + +#endif /* !CONFIG_HUGETLB_PAGE */ + +#ifdef MODULE +#define __page_aligned __attribute__((__aligned__(PAGE_SIZE))) +#else +#define __page_aligned \ + __attribute__((__aligned__(PAGE_SIZE), \ + __section__(".data.page_aligned"))) +#endif + +#define VM_DATA_DEFAULT_FLAGS \ + (test_thread_flag(TIF_32BIT) ? \ + VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64) + +/* + * This is the default if a program doesn't have a PT_GNU_STACK + * program header entry. The PPC64 ELF ABI has a non executable stack + * stack by default, so in the absense of a PT_GNU_STACK program header + * we turn execute permission off. + */ +#define VM_STACK_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#define VM_STACK_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#define VM_STACK_DEFAULT_FLAGS \ + (test_thread_flag(TIF_32BIT) ? \ + VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64) + +#include <asm-generic/page.h> + +#endif /* _ASM_POWERPC_PAGE_64_H */ diff --git a/include/asm-ppc64/serial.h b/include/asm-powerpc/serial.h index d6bcb79b7d7b..b273d630b32f 100644 --- a/include/asm-ppc64/serial.h +++ b/include/asm-powerpc/serial.h @@ -1,21 +1,16 @@ /* - * include/asm-ppc64/serial.h - */ -#ifndef _PPC64_SERIAL_H -#define _PPC64_SERIAL_H - -/* - * This assumes you have a 1.8432 MHz clock for your UART. - * - * It'd be nice if someone built a serial card with a 24.576 MHz - * clock, since the 16550A is capable of handling a top speed of 1.5 - * megabits/second; but this requires the faster clock. - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ +#ifndef _ASM_POWERPC_SERIAL_H +#define _ASM_POWERPC_SERIAL_H + +/* + * Serial ports are not listed here, because they are discovered + * through the device tree. + */ /* Default baud base if not found in device-tree */ #define BASE_BAUD ( 1843200 / 16 ) diff --git a/include/asm-powerpc/vdso_datapage.h b/include/asm-powerpc/vdso_datapage.h index fc323b51366b..411832d5bbdb 100644 --- a/include/asm-powerpc/vdso_datapage.h +++ b/include/asm-powerpc/vdso_datapage.h @@ -73,7 +73,7 @@ struct vdso_data { /* those additional ones don't have to be located anywhere * special as they were not part of the original systemcfg */ - __s64 wtom_clock_sec; /* Wall to monotonic clock */ + __s32 wtom_clock_sec; /* Wall to monotonic clock */ __s32 wtom_clock_nsec; __u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */ __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ diff --git a/include/asm-ppc/nvram.h b/include/asm-ppc/nvram.h deleted file mode 100644 index 31ef16e3fc4f..000000000000 --- a/include/asm-ppc/nvram.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * PreP compliant NVRAM access - */ - -#ifdef __KERNEL__ -#ifndef _PPC_NVRAM_H -#define _PPC_NVRAM_H - -#define NVRAM_AS0 0x74 -#define NVRAM_AS1 0x75 -#define NVRAM_DATA 0x77 - - -/* RTC Offsets */ - -#define MOTO_RTC_SECONDS 0x1FF9 -#define MOTO_RTC_MINUTES 0x1FFA -#define MOTO_RTC_HOURS 0x1FFB -#define MOTO_RTC_DAY_OF_WEEK 0x1FFC -#define MOTO_RTC_DAY_OF_MONTH 0x1FFD -#define MOTO_RTC_MONTH 0x1FFE -#define MOTO_RTC_YEAR 0x1FFF -#define MOTO_RTC_CONTROLA 0x1FF8 -#define MOTO_RTC_CONTROLB 0x1FF9 - -/* PowerMac specific nvram stuffs */ - -enum { - pmac_nvram_OF, /* Open Firmware partition */ - pmac_nvram_XPRAM, /* MacOS XPRAM partition */ - pmac_nvram_NR /* MacOS Name Registry partition */ -}; - -/* Return partition offset in nvram */ -extern int pmac_get_partition(int partition); - -/* Direct access to XPRAM on PowerMacs */ -extern u8 pmac_xpram_read(int xpaddr); -extern void pmac_xpram_write(int xpaddr, u8 data); - -/* Synchronize NVRAM */ -extern void nvram_sync(void); - -/* Normal access to NVRAM */ -extern unsigned char nvram_read_byte(int i); -extern void nvram_write_byte(unsigned char c, int i); - -/* Some offsets in XPRAM */ -#define PMAC_XPRAM_MACHINE_LOC 0xe4 -#define PMAC_XPRAM_SOUND_VOLUME 0x08 - -/* Machine location structure in PowerMac XPRAM */ -struct pmac_machine_location { - unsigned int latitude; /* 2+30 bit Fractional number */ - unsigned int longitude; /* 2+30 bit Fractional number */ - unsigned int delta; /* mix of GMT delta and DLS */ -}; - -/* - * /dev/nvram ioctls - * - * Note that PMAC_NVRAM_GET_OFFSET is still supported, but is - * definitely obsolete. Do not use it if you can avoid it - */ - -#define OBSOLETE_PMAC_NVRAM_GET_OFFSET \ - _IOWR('p', 0x40, int) - -#define IOC_NVRAM_GET_OFFSET _IOWR('p', 0x42, int) /* Get NVRAM partition offset */ -#define IOC_NVRAM_SYNC _IO('p', 0x43) /* Sync NVRAM image */ - -#endif -#endif /* __KERNEL__ */ diff --git a/include/asm-ppc64/page.h b/include/asm-ppc64/page.h deleted file mode 100644 index 3efc3288f7e9..000000000000 --- a/include/asm-ppc64/page.h +++ /dev/null @@ -1,328 +0,0 @@ -#ifndef _PPC64_PAGE_H -#define _PPC64_PAGE_H - -/* - * Copyright (C) 2001 PPC64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/config.h> -#include <asm/asm-compat.h> - -/* - * We support either 4k or 64k software page size. When using 64k pages - * however, wether we are really supporting 64k pages in HW or not is - * irrelevant to those definitions. We always define HW_PAGE_SHIFT to 12 - * as use of 64k pages remains a linux kernel specific, every notion of - * page number shared with the firmware, TCEs, iommu, etc... still assumes - * a page size of 4096. - */ -#ifdef CONFIG_PPC_64K_PAGES -#define PAGE_SHIFT 16 -#else -#define PAGE_SHIFT 12 -#endif - -#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) - -/* HW_PAGE_SHIFT is always 4k pages */ -#define HW_PAGE_SHIFT 12 -#define HW_PAGE_SIZE (ASM_CONST(1) << HW_PAGE_SHIFT) -#define HW_PAGE_MASK (~(HW_PAGE_SIZE-1)) - -/* PAGE_FACTOR is the number of bits factor between PAGE_SHIFT and - * HW_PAGE_SHIFT, that is 4k pages - */ -#define PAGE_FACTOR (PAGE_SHIFT - HW_PAGE_SHIFT) - -/* Segment size */ -#define SID_SHIFT 28 -#define SID_MASK 0xfffffffffUL -#define ESID_MASK 0xfffffffff0000000UL -#define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK) - -/* Large pages size */ - -#ifndef __ASSEMBLY__ -extern unsigned int HPAGE_SHIFT; -#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) -#define HPAGE_MASK (~(HPAGE_SIZE - 1)) -#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) -#endif /* __ASSEMBLY__ */ - -#ifdef CONFIG_HUGETLB_PAGE - - -#define HTLB_AREA_SHIFT 40 -#define HTLB_AREA_SIZE (1UL << HTLB_AREA_SHIFT) -#define GET_HTLB_AREA(x) ((x) >> HTLB_AREA_SHIFT) - -#define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \ - - (1U << GET_ESID(addr))) & 0xffff) -#define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \ - - (1U << GET_HTLB_AREA(addr))) & 0xffff) - -#define ARCH_HAS_HUGEPAGE_ONLY_RANGE -#define ARCH_HAS_PREPARE_HUGEPAGE_RANGE -#define ARCH_HAS_SETCLEAR_HUGE_PTE - -#define touches_hugepage_low_range(mm, addr, len) \ - (LOW_ESID_MASK((addr), (len)) & (mm)->context.low_htlb_areas) -#define touches_hugepage_high_range(mm, addr, len) \ - (HTLB_AREA_MASK((addr), (len)) & (mm)->context.high_htlb_areas) - -#define __within_hugepage_low_range(addr, len, segmask) \ - ((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask)) -#define within_hugepage_low_range(addr, len) \ - __within_hugepage_low_range((addr), (len), \ - current->mm->context.low_htlb_areas) -#define __within_hugepage_high_range(addr, len, zonemask) \ - ((HTLB_AREA_MASK((addr), (len)) | (zonemask)) == (zonemask)) -#define within_hugepage_high_range(addr, len) \ - __within_hugepage_high_range((addr), (len), \ - current->mm->context.high_htlb_areas) - -#define is_hugepage_only_range(mm, addr, len) \ - (touches_hugepage_high_range((mm), (addr), (len)) || \ - touches_hugepage_low_range((mm), (addr), (len))) -#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA - -#define in_hugepage_area(context, addr) \ - (cpu_has_feature(CPU_FTR_16M_PAGE) && \ - ( ((1 << GET_HTLB_AREA(addr)) & (context).high_htlb_areas) || \ - ( ((addr) < 0x100000000L) && \ - ((1 << GET_ESID(addr)) & (context).low_htlb_areas) ) ) ) - -#else /* !CONFIG_HUGETLB_PAGE */ - -#define in_hugepage_area(mm, addr) 0 - -#endif /* !CONFIG_HUGETLB_PAGE */ - -/* align addr on a size boundary - adjust address up/down if needed */ -#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1))) -#define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1))) - -/* align addr on a size boundary - adjust address up if needed */ -#define _ALIGN(addr,size) _ALIGN_UP(addr,size) - -/* to align the pointer to the (next) page boundary */ -#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) - -#ifdef __KERNEL__ -#ifndef __ASSEMBLY__ -#include <asm/cache.h> - -#undef STRICT_MM_TYPECHECKS - -#define REGION_SIZE 4UL -#define REGION_SHIFT 60UL -#define REGION_MASK (((1UL<<REGION_SIZE)-1UL)<<REGION_SHIFT) - -static __inline__ void clear_page(void *addr) -{ - unsigned long lines, line_size; - - line_size = ppc64_caches.dline_size; - lines = ppc64_caches.dlines_per_page; - - __asm__ __volatile__( - "mtctr %1 # clear_page\n\ -1: dcbz 0,%0\n\ - add %0,%0,%3\n\ - bdnz+ 1b" - : "=r" (addr) - : "r" (lines), "0" (addr), "r" (line_size) - : "ctr", "memory"); -} - -extern void copy_4K_page(void *to, void *from); - -#ifdef CONFIG_PPC_64K_PAGES -static inline void copy_page(void *to, void *from) -{ - unsigned int i; - for (i=0; i < (1 << (PAGE_SHIFT - 12)); i++) { - copy_4K_page(to, from); - to += 4096; - from += 4096; - } -} -#else /* CONFIG_PPC_64K_PAGES */ -static inline void copy_page(void *to, void *from) -{ - copy_4K_page(to, from); -} -#endif /* CONFIG_PPC_64K_PAGES */ - -struct page; -extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg); -extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct page *p); - -#ifdef STRICT_MM_TYPECHECKS -/* - * These are used to make use of C type-checking. - * Entries in the pte table are 64b, while entries in the pgd & pmd are 32b. - */ - -/* PTE level */ -typedef struct { unsigned long pte; } pte_t; -#define pte_val(x) ((x).pte) -#define __pte(x) ((pte_t) { (x) }) - -/* 64k pages additionally define a bigger "real PTE" type that gathers - * the "second half" part of the PTE for pseudo 64k pages - */ -#ifdef CONFIG_PPC_64K_PAGES -typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; -#else -typedef struct { pte_t pte; } real_pte_t; -#endif - -/* PMD level */ -typedef struct { unsigned long pmd; } pmd_t; -#define pmd_val(x) ((x).pmd) -#define __pmd(x) ((pmd_t) { (x) }) - -/* PUD level exusts only on 4k pages */ -#ifndef CONFIG_PPC_64K_PAGES -typedef struct { unsigned long pud; } pud_t; -#define pud_val(x) ((x).pud) -#define __pud(x) ((pud_t) { (x) }) -#endif - -/* PGD level */ -typedef struct { unsigned long pgd; } pgd_t; -#define pgd_val(x) ((x).pgd) -#define __pgd(x) ((pgd_t) { (x) }) - -/* Page protection bits */ -typedef struct { unsigned long pgprot; } pgprot_t; -#define pgprot_val(x) ((x).pgprot) -#define __pgprot(x) ((pgprot_t) { (x) }) - -#else - -/* - * .. while these make it easier on the compiler - */ - -typedef unsigned long pte_t; -#define pte_val(x) (x) -#define __pte(x) (x) - -#ifdef CONFIG_PPC_64K_PAGES -typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; -#else -typedef unsigned long real_pte_t; -#endif - - -typedef unsigned long pmd_t; -#define pmd_val(x) (x) -#define __pmd(x) (x) - -#ifndef CONFIG_PPC_64K_PAGES -typedef unsigned long pud_t; -#define pud_val(x) (x) -#define __pud(x) (x) -#endif - -typedef unsigned long pgd_t; -#define pgd_val(x) (x) -#define pgprot_val(x) (x) - -typedef unsigned long pgprot_t; -#define __pgd(x) (x) -#define __pgprot(x) (x) - -#endif - -#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) - -extern int page_is_ram(unsigned long pfn); - -extern u64 ppc64_pft_size; /* Log 2 of page table size */ - -/* We do define AT_SYSINFO_EHDR but don't use the gate mecanism */ -#define __HAVE_ARCH_GATE_AREA 1 - -#endif /* __ASSEMBLY__ */ - -#ifdef MODULE -#define __page_aligned __attribute__((__aligned__(PAGE_SIZE))) -#else -#define __page_aligned \ - __attribute__((__aligned__(PAGE_SIZE), \ - __section__(".data.page_aligned"))) -#endif - - -/* This must match the -Ttext linker address */ -/* Note: tophys & tovirt make assumptions about how */ -/* KERNELBASE is defined for performance reasons. */ -/* When KERNELBASE moves, those macros may have */ -/* to change! */ -#define PAGE_OFFSET ASM_CONST(0xC000000000000000) -#define KERNELBASE PAGE_OFFSET -#define VMALLOCBASE ASM_CONST(0xD000000000000000) - -#define VMALLOC_REGION_ID (VMALLOCBASE >> REGION_SHIFT) -#define KERNEL_REGION_ID (KERNELBASE >> REGION_SHIFT) -#define USER_REGION_ID (0UL) -#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) - -#define __va(x) ((void *)((unsigned long)(x) + KERNELBASE)) - -#ifdef CONFIG_FLATMEM -#define pfn_to_page(pfn) (mem_map + (pfn)) -#define page_to_pfn(page) ((unsigned long)((page) - mem_map)) -#define pfn_valid(pfn) ((pfn) < max_mapnr) -#endif - -#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) - -#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) - -/* - * Unfortunately the PLT is in the BSS in the PPC32 ELF ABI, - * and needs to be executable. This means the whole heap ends - * up being executable. - */ -#define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -#define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -#define VM_DATA_DEFAULT_FLAGS \ - (test_thread_flag(TIF_32BIT) ? \ - VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64) - -/* - * This is the default if a program doesn't have a PT_GNU_STACK - * program header entry. The PPC64 ELF ABI has a non executable stack - * stack by default, so in the absense of a PT_GNU_STACK program header - * we turn execute permission off. - */ -#define VM_STACK_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -#define VM_STACK_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - -#define VM_STACK_DEFAULT_FLAGS \ - (test_thread_flag(TIF_32BIT) ? \ - VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64) - -#endif /* __KERNEL__ */ - -#include <asm-generic/page.h> - -#endif /* _PPC64_PAGE_H */ diff --git a/include/asm-ppc64/prom.h b/include/asm-ppc64/prom.h deleted file mode 100644 index ddfe186589fa..000000000000 --- a/include/asm-ppc64/prom.h +++ /dev/null @@ -1,220 +0,0 @@ -#ifndef _PPC64_PROM_H -#define _PPC64_PROM_H - -/* - * Definitions for talking to the Open Firmware PROM on - * Power Macintosh computers. - * - * Copyright (C) 1996 Paul Mackerras. - * - * Updates for PPC64 by Peter Bergner & David Engebretsen, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include <linux/config.h> -#include <linux/proc_fs.h> -#include <asm/atomic.h> - -#define PTRRELOC(x) ((typeof(x))((unsigned long)(x) - offset)) -#define PTRUNRELOC(x) ((typeof(x))((unsigned long)(x) + offset)) -#define RELOC(x) (*PTRRELOC(&(x))) - -/* Definitions used by the flattened device tree */ -#define OF_DT_HEADER 0xd00dfeed /* marker */ -#define OF_DT_BEGIN_NODE 0x1 /* Start of node, full name */ -#define OF_DT_END_NODE 0x2 /* End node */ -#define OF_DT_PROP 0x3 /* Property: name off, size, - * content */ -#define OF_DT_NOP 0x4 /* nop */ -#define OF_DT_END 0x9 - -#define OF_DT_VERSION 0x10 - -/* - * This is what gets passed to the kernel by prom_init or kexec - * - * The dt struct contains the device tree structure, full pathes and - * property contents. The dt strings contain a separate block with just - * the strings for the property names, and is fully page aligned and - * self contained in a page, so that it can be kept around by the kernel, - * each property name appears only once in this page (cheap compression) - * - * the mem_rsvmap contains a map of reserved ranges of physical memory, - * passing it here instead of in the device-tree itself greatly simplifies - * the job of everybody. It's just a list of u64 pairs (base/size) that - * ends when size is 0 - */ -struct boot_param_header -{ - u32 magic; /* magic word OF_DT_HEADER */ - u32 totalsize; /* total size of DT block */ - u32 off_dt_struct; /* offset to structure */ - u32 off_dt_strings; /* offset to strings */ - u32 off_mem_rsvmap; /* offset to memory reserve map */ - u32 version; /* format version */ - u32 last_comp_version; /* last compatible version */ - /* version 2 fields below */ - u32 boot_cpuid_phys; /* Physical CPU id we're booting on */ - /* version 3 fields below */ - u32 dt_strings_size; /* size of the DT strings block */ -}; - - - -typedef u32 phandle; -typedef u32 ihandle; - -struct address_range { - unsigned long space; - unsigned long address; - unsigned long size; -}; - -struct interrupt_info { - int line; - int sense; /* +ve/-ve logic, edge or level, etc. */ -}; - -struct pci_address { - u32 a_hi; - u32 a_mid; - u32 a_lo; -}; - -struct isa_address { - u32 a_hi; - u32 a_lo; -}; - -struct isa_range { - struct isa_address isa_addr; - struct pci_address pci_addr; - unsigned int size; -}; - -struct reg_property { - unsigned long address; - unsigned long size; -}; - -struct reg_property32 { - unsigned int address; - unsigned int size; -}; - -struct reg_property64 { - unsigned long address; - unsigned long size; -}; - -struct property { - char *name; - int length; - unsigned char *value; - struct property *next; -}; - -struct device_node { - char *name; - char *type; - phandle node; - phandle linux_phandle; - int n_addrs; - struct address_range *addrs; - int n_intrs; - struct interrupt_info *intrs; - char *full_name; - - struct property *properties; - struct device_node *parent; - struct device_node *child; - struct device_node *sibling; - struct device_node *next; /* next device of same type */ - struct device_node *allnext; /* next in list of all nodes */ - struct proc_dir_entry *pde; /* this node's proc directory */ - struct kref kref; - unsigned long _flags; - void *data; -#ifdef CONFIG_PPC_ISERIES - struct list_head Device_List; -#endif -}; - -extern struct device_node *of_chosen; - -/* flag descriptions */ -#define OF_DYNAMIC 1 /* node and properties were allocated via kmalloc */ - -#define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags) -#define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags) - -/* - * Until 32-bit ppc can add proc_dir_entries to its device_node - * definition, we cannot refer to pde, name_link, and addr_link - * in arch-independent code. - */ -#define HAVE_ARCH_DEVTREE_FIXUPS - -static inline void set_node_proc_entry(struct device_node *dn, struct proc_dir_entry *de) -{ - dn->pde = de; -} - - -/* OBSOLETE: Old stlye node lookup */ -extern struct device_node *find_devices(const char *name); -extern struct device_node *find_type_devices(const char *type); -extern struct device_node *find_path_device(const char *path); -extern struct device_node *find_compatible_devices(const char *type, - const char *compat); -extern struct device_node *find_all_nodes(void); - -/* New style node lookup */ -extern struct device_node *of_find_node_by_name(struct device_node *from, - const char *name); -extern struct device_node *of_find_node_by_type(struct device_node *from, - const char *type); -extern struct device_node *of_find_compatible_node(struct device_node *from, - const char *type, const char *compat); -extern struct device_node *of_find_node_by_path(const char *path); -extern struct device_node *of_find_node_by_phandle(phandle handle); -extern struct device_node *of_find_all_nodes(struct device_node *prev); -extern struct device_node *of_get_parent(const struct device_node *node); -extern struct device_node *of_get_next_child(const struct device_node *node, - struct device_node *prev); -extern struct device_node *of_node_get(struct device_node *node); -extern void of_node_put(struct device_node *node); - -/* For scanning the flat device-tree at boot time */ -int __init of_scan_flat_dt(int (*it)(unsigned long node, - const char *uname, int depth, - void *data), - void *data); -void* __init of_get_flat_dt_prop(unsigned long node, const char *name, - unsigned long *size); - -/* For updating the device tree at runtime */ -extern void of_attach_node(struct device_node *); -extern void of_detach_node(const struct device_node *); - -/* Other Prototypes */ -extern unsigned long prom_init(unsigned long, unsigned long, unsigned long, - unsigned long, unsigned long); -extern void finish_device_tree(void); -extern void unflatten_device_tree(void); -extern void early_init_devtree(void *); -extern int device_is_compatible(struct device_node *device, const char *); -extern int machine_is_compatible(const char *compat); -extern unsigned char *get_property(struct device_node *node, const char *name, - int *lenp); -extern void print_properties(struct device_node *node); -extern int prom_n_addr_cells(struct device_node* np); -extern int prom_n_size_cells(struct device_node* np); -extern int prom_n_intr_cells(struct device_node* np); -extern void prom_get_irq_senses(unsigned char *senses, int off, int max); -extern int prom_add_property(struct device_node* np, struct property* prop); - -#endif /* _PPC64_PROM_H */ diff --git a/include/asm-ppc64/system.h b/include/asm-ppc64/system.h deleted file mode 100644 index bf9a6aba19c9..000000000000 --- a/include/asm-ppc64/system.h +++ /dev/null @@ -1,310 +0,0 @@ -#ifndef __PPC64_SYSTEM_H -#define __PPC64_SYSTEM_H - -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/config.h> -#include <linux/compiler.h> -#include <asm/page.h> -#include <asm/processor.h> -#include <asm/hw_irq.h> -#include <asm/synch.h> - -/* - * Memory barrier. - * The sync instruction guarantees that all memory accesses initiated - * by this processor have been performed (with respect to all other - * mechanisms that access memory). The eieio instruction is a barrier - * providing an ordering (separately) for (a) cacheable stores and (b) - * loads and stores to non-cacheable memory (e.g. I/O devices). - * - * mb() prevents loads and stores being reordered across this point. - * rmb() prevents loads being reordered across this point. - * wmb() prevents stores being reordered across this point. - * read_barrier_depends() prevents data-dependent loads being reordered - * across this point (nop on PPC). - * - * We have to use the sync instructions for mb(), since lwsync doesn't - * order loads with respect to previous stores. Lwsync is fine for - * rmb(), though. - * For wmb(), we use sync since wmb is used in drivers to order - * stores to system memory with respect to writes to the device. - * However, smp_wmb() can be a lighter-weight eieio barrier on - * SMP since it is only used to order updates to system memory. - */ -#define mb() __asm__ __volatile__ ("sync" : : : "memory") -#define rmb() __asm__ __volatile__ ("lwsync" : : : "memory") -#define wmb() __asm__ __volatile__ ("sync" : : : "memory") -#define read_barrier_depends() do { } while(0) - -#define set_mb(var, value) do { var = value; smp_mb(); } while (0) -#define set_wmb(var, value) do { var = value; smp_wmb(); } while (0) - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() eieio() -#define smp_read_barrier_depends() read_barrier_depends() -#else -#define smp_mb() __asm__ __volatile__("": : :"memory") -#define smp_rmb() __asm__ __volatile__("": : :"memory") -#define smp_wmb() __asm__ __volatile__("": : :"memory") -#define smp_read_barrier_depends() do { } while(0) -#endif /* CONFIG_SMP */ - -#ifdef __KERNEL__ -struct task_struct; -struct pt_regs; - -#ifdef CONFIG_DEBUGGER - -extern int (*__debugger)(struct pt_regs *regs); -extern int (*__debugger_ipi)(struct pt_regs *regs); -extern int (*__debugger_bpt)(struct pt_regs *regs); -extern int (*__debugger_sstep)(struct pt_regs *regs); -extern int (*__debugger_iabr_match)(struct pt_regs *regs); -extern int (*__debugger_dabr_match)(struct pt_regs *regs); -extern int (*__debugger_fault_handler)(struct pt_regs *regs); - -#define DEBUGGER_BOILERPLATE(__NAME) \ -static inline int __NAME(struct pt_regs *regs) \ -{ \ - if (unlikely(__ ## __NAME)) \ - return __ ## __NAME(regs); \ - return 0; \ -} - -DEBUGGER_BOILERPLATE(debugger) -DEBUGGER_BOILERPLATE(debugger_ipi) -DEBUGGER_BOILERPLATE(debugger_bpt) -DEBUGGER_BOILERPLATE(debugger_sstep) -DEBUGGER_BOILERPLATE(debugger_iabr_match) -DEBUGGER_BOILERPLATE(debugger_dabr_match) -DEBUGGER_BOILERPLATE(debugger_fault_handler) - -#ifdef CONFIG_XMON -extern void xmon_init(int enable); -#endif - -#else -static inline int debugger(struct pt_regs *regs) { return 0; } -static inline int debugger_ipi(struct pt_regs *regs) { return 0; } -static inline int debugger_bpt(struct pt_regs *regs) { return 0; } -static inline int debugger_sstep(struct pt_regs *regs) { return 0; } -static inline int debugger_iabr_match(struct pt_regs *regs) { return 0; } -static inline int debugger_dabr_match(struct pt_regs *regs) { return 0; } -static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; } -#endif - -extern int set_dabr(unsigned long dabr); -extern void _exception(int signr, struct pt_regs *regs, int code, - unsigned long addr); -extern int fix_alignment(struct pt_regs *regs); -extern void bad_page_fault(struct pt_regs *regs, unsigned long address, - int sig); -extern void show_regs(struct pt_regs * regs); -extern void low_hash_fault(struct pt_regs *regs, unsigned long address); -extern int die(const char *str, struct pt_regs *regs, long err); - -extern int _get_PVR(void); -extern void giveup_fpu(struct task_struct *); -extern void disable_kernel_fp(void); -extern void flush_fp_to_thread(struct task_struct *); -extern void enable_kernel_fp(void); -extern void giveup_altivec(struct task_struct *); -extern void disable_kernel_altivec(void); -extern void enable_kernel_altivec(void); -extern int emulate_altivec(struct pt_regs *); -extern void cvt_fd(float *from, double *to, struct thread_struct *thread); -extern void cvt_df(double *from, float *to, struct thread_struct *thread); - -#ifdef CONFIG_ALTIVEC -extern void flush_altivec_to_thread(struct task_struct *); -#else -static inline void flush_altivec_to_thread(struct task_struct *t) -{ -} -#endif - -static inline void flush_spe_to_thread(struct task_struct *t) -{ -} - -extern int mem_init_done; /* set on boot once kmalloc can be called */ -extern unsigned long memory_limit; - -/* EBCDIC -> ASCII conversion for [0-9A-Z] on iSeries */ -extern unsigned char e2a(unsigned char); - -extern struct task_struct *__switch_to(struct task_struct *, - struct task_struct *); -#define switch_to(prev, next, last) ((last) = __switch_to((prev), (next))) - -struct thread_struct; -extern struct task_struct * _switch(struct thread_struct *prev, - struct thread_struct *next); - -extern unsigned long klimit; - -extern int powersave_nap; /* set if nap mode can be used in idle loop */ - -/* - * Atomic exchange - * - * Changes the memory location '*ptr' to be val and returns - * the previous value stored there. - * - * Inline asm pulled from arch/ppc/kernel/misc.S so ppc64 - * is more like most of the other architectures. - */ -static __inline__ unsigned long -__xchg_u32(volatile unsigned int *m, unsigned long val) -{ - unsigned long dummy; - - __asm__ __volatile__( - EIEIO_ON_SMP -"1: lwarx %0,0,%3 # __xchg_u32\n\ - stwcx. %2,0,%3\n\ -2: bne- 1b" - ISYNC_ON_SMP - : "=&r" (dummy), "=m" (*m) - : "r" (val), "r" (m) - : "cc", "memory"); - - return (dummy); -} - -static __inline__ unsigned long -__xchg_u64(volatile long *m, unsigned long val) -{ - unsigned long dummy; - - __asm__ __volatile__( - EIEIO_ON_SMP -"1: ldarx %0,0,%3 # __xchg_u64\n\ - stdcx. %2,0,%3\n\ -2: bne- 1b" - ISYNC_ON_SMP - : "=&r" (dummy), "=m" (*m) - : "r" (val), "r" (m) - : "cc", "memory"); - - return (dummy); -} - -/* - * This function doesn't exist, so you'll get a linker error - * if something tries to do an invalid xchg(). - */ -extern void __xchg_called_with_bad_pointer(void); - -static __inline__ unsigned long -__xchg(volatile void *ptr, unsigned long x, unsigned int size) -{ - switch (size) { - case 4: - return __xchg_u32(ptr, x); - case 8: - return __xchg_u64(ptr, x); - } - __xchg_called_with_bad_pointer(); - return x; -} - -#define xchg(ptr,x) \ - ({ \ - __typeof__(*(ptr)) _x_ = (x); \ - (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \ - }) - -#define tas(ptr) (xchg((ptr),1)) - -#define __HAVE_ARCH_CMPXCHG 1 - -static __inline__ unsigned long -__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) -{ - unsigned int prev; - - __asm__ __volatile__ ( - EIEIO_ON_SMP -"1: lwarx %0,0,%2 # __cmpxchg_u32\n\ - cmpw 0,%0,%3\n\ - bne- 2f\n\ - stwcx. %4,0,%2\n\ - bne- 1b" - ISYNC_ON_SMP - "\n\ -2:" - : "=&r" (prev), "=m" (*p) - : "r" (p), "r" (old), "r" (new), "m" (*p) - : "cc", "memory"); - - return prev; -} - -static __inline__ unsigned long -__cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new) -{ - unsigned long prev; - - __asm__ __volatile__ ( - EIEIO_ON_SMP -"1: ldarx %0,0,%2 # __cmpxchg_u64\n\ - cmpd 0,%0,%3\n\ - bne- 2f\n\ - stdcx. %4,0,%2\n\ - bne- 1b" - ISYNC_ON_SMP - "\n\ -2:" - : "=&r" (prev), "=m" (*p) - : "r" (p), "r" (old), "r" (new), "m" (*p) - : "cc", "memory"); - - return prev; -} - -/* This function doesn't exist, so you'll get a linker error - if something tries to do an invalid cmpxchg(). */ -extern void __cmpxchg_called_with_bad_pointer(void); - -static __inline__ unsigned long -__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, - unsigned int size) -{ - switch (size) { - case 4: - return __cmpxchg_u32(ptr, old, new); - case 8: - return __cmpxchg_u64(ptr, old, new); - } - __cmpxchg_called_with_bad_pointer(); - return old; -} - -#define cmpxchg(ptr,o,n)\ - ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ - (unsigned long)(n),sizeof(*(ptr)))) - -/* - * We handle most unaligned accesses in hardware. On the other hand - * unaligned DMA can be very expensive on some ppc64 IO chips (it does - * powers of 2 writes until it reaches sufficient alignment). - * - * Based on this we disable the IP header alignment in network drivers. - */ -#define NET_IP_ALIGN 0 - -#define arch_align_stack(x) (x) - -extern unsigned long reloc_offset(void); - -#endif /* __KERNEL__ */ -#endif diff --git a/include/asm-v850/hardirq.h b/include/asm-v850/hardirq.h index 5dfca8047cbe..d98488cd5af1 100644 --- a/include/asm-v850/hardirq.h +++ b/include/asm-v850/hardirq.h @@ -5,6 +5,8 @@ #include <linux/threads.h> #include <linux/cache.h> +#include <asm/irq.h> + typedef struct { unsigned int __softirq_pending; } ____cacheline_aligned irq_cpustat_t; @@ -22,4 +24,6 @@ typedef struct { # error HARDIRQ_BITS is too low! #endif +void ack_bad_irq(unsigned int irq); + #endif /* __V850_HARDIRQ_H__ */ diff --git a/include/asm-x86_64/apic.h b/include/asm-x86_64/apic.h index 6c5d5ca8383a..5647b7de1749 100644 --- a/include/asm-x86_64/apic.h +++ b/include/asm-x86_64/apic.h @@ -111,6 +111,8 @@ extern unsigned int nmi_watchdog; extern int disable_timer_pin_1; +extern void setup_threshold_lvt(unsigned long lvt_off); + #endif /* CONFIG_X86_LOCAL_APIC */ extern unsigned boot_cpu_id; diff --git a/include/asm-x86_64/cache.h b/include/asm-x86_64/cache.h index eda62bae1240..33e53424128b 100644 --- a/include/asm-x86_64/cache.h +++ b/include/asm-x86_64/cache.h @@ -9,6 +9,6 @@ /* L1 cache line size */ #define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) -#define L1_CACHE_SHIFT_MAX 6 /* largest L1 which this arch supports */ +#define L1_CACHE_SHIFT_MAX 7 /* largest L1 which this arch supports */ #endif diff --git a/include/asm-x86_64/desc.h b/include/asm-x86_64/desc.h index b837820c9073..33764869387b 100644 --- a/include/asm-x86_64/desc.h +++ b/include/asm-x86_64/desc.h @@ -98,16 +98,19 @@ static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsig static inline void set_intr_gate(int nr, void *func) { + BUG_ON((unsigned)nr > 0xFF); _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 0); } static inline void set_intr_gate_ist(int nr, void *func, unsigned ist) { + BUG_ON((unsigned)nr > 0xFF); _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, ist); } static inline void set_system_gate(int nr, void *func) { + BUG_ON((unsigned)nr > 0xFF); _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0); } diff --git a/include/asm-x86_64/dma.h b/include/asm-x86_64/dma.h index 16fa3a064d0c..6f2a817b6a7c 100644 --- a/include/asm-x86_64/dma.h +++ b/include/asm-x86_64/dma.h @@ -72,8 +72,15 @@ #define MAX_DMA_CHANNELS 8 -/* The maximum address that we can perform a DMA transfer to on this platform */ -#define MAX_DMA_ADDRESS (PAGE_OFFSET+0x1000000) + +/* 16MB ISA DMA zone */ +#define MAX_DMA_PFN ((16*1024*1024) >> PAGE_SHIFT) + +/* 4GB broken PCI/AGP hardware bus master zone */ +#define MAX_DMA32_PFN ((4UL*1024*1024*1024) >> PAGE_SHIFT) + +/* Compat define for old dma zone */ +#define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT)) /* 8237 DMA controllers */ #define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */ diff --git a/include/asm-x86_64/hpet.h b/include/asm-x86_64/hpet.h index a3877f570998..c20c28f5c7a0 100644 --- a/include/asm-x86_64/hpet.h +++ b/include/asm-x86_64/hpet.h @@ -14,18 +14,18 @@ #define HPET_CFG 0x010 #define HPET_STATUS 0x020 #define HPET_COUNTER 0x0f0 -#define HPET_T0_CFG 0x100 -#define HPET_T0_CMP 0x108 -#define HPET_T0_ROUTE 0x110 -#define HPET_T1_CFG 0x120 -#define HPET_T1_CMP 0x128 -#define HPET_T1_ROUTE 0x130 -#define HPET_T2_CFG 0x140 -#define HPET_T2_CMP 0x148 -#define HPET_T2_ROUTE 0x150 +#define HPET_Tn_OFFSET 0x20 +#define HPET_Tn_CFG(n) (0x100 + (n) * HPET_Tn_OFFSET) +#define HPET_Tn_ROUTE(n) (0x104 + (n) * HPET_Tn_OFFSET) +#define HPET_Tn_CMP(n) (0x108 + (n) * HPET_Tn_OFFSET) +#define HPET_T0_CFG HPET_Tn_CFG(0) +#define HPET_T0_CMP HPET_Tn_CMP(0) +#define HPET_T1_CFG HPET_Tn_CFG(1) +#define HPET_T1_CMP HPET_Tn_CMP(1) #define HPET_ID_VENDOR 0xffff0000 #define HPET_ID_LEGSUP 0x00008000 +#define HPET_ID_64BIT 0x00002000 #define HPET_ID_NUMBER 0x00001f00 #define HPET_ID_REV 0x000000ff #define HPET_ID_NUMBER_SHIFT 8 @@ -38,11 +38,18 @@ #define HPET_LEGACY_8254 2 #define HPET_LEGACY_RTC 8 -#define HPET_TN_ENABLE 0x004 -#define HPET_TN_PERIODIC 0x008 -#define HPET_TN_PERIODIC_CAP 0x010 -#define HPET_TN_SETVAL 0x040 -#define HPET_TN_32BIT 0x100 +#define HPET_TN_LEVEL 0x0002 +#define HPET_TN_ENABLE 0x0004 +#define HPET_TN_PERIODIC 0x0008 +#define HPET_TN_PERIODIC_CAP 0x0010 +#define HPET_TN_64BIT_CAP 0x0020 +#define HPET_TN_SETVAL 0x0040 +#define HPET_TN_32BIT 0x0100 +#define HPET_TN_ROUTE 0x3e00 +#define HPET_TN_FSB 0x4000 +#define HPET_TN_FSB_CAP 0x8000 + +#define HPET_TN_ROUTE_SHIFT 9 extern int is_hpet_enabled(void); extern int hpet_rtc_timer_init(void); diff --git a/include/asm-x86_64/hw_irq.h b/include/asm-x86_64/hw_irq.h index dc97668ea0f9..c14a8c7267a6 100644 --- a/include/asm-x86_64/hw_irq.h +++ b/include/asm-x86_64/hw_irq.h @@ -55,7 +55,7 @@ struct hw_interrupt_type; #define CALL_FUNCTION_VECTOR 0xfc #define KDB_VECTOR 0xfb /* reserved for KDB */ #define THERMAL_APIC_VECTOR 0xfa -/* 0xf9 free */ +#define THRESHOLD_APIC_VECTOR 0xf9 #define INVALIDATE_TLB_VECTOR_END 0xf8 #define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f8 used for TLB flush */ diff --git a/include/asm-x86_64/ia32.h b/include/asm-x86_64/ia32.h index 6efa00fe4e7b..c7bc9c0525ba 100644 --- a/include/asm-x86_64/ia32.h +++ b/include/asm-x86_64/ia32.h @@ -165,6 +165,11 @@ struct siginfo_t; int do_get_thread_area(struct thread_struct *t, struct user_desc __user *info); int do_set_thread_area(struct thread_struct *t, struct user_desc __user *info); int ia32_child_tls(struct task_struct *p, struct pt_regs *childregs); + +struct linux_binprm; +extern int ia32_setup_arg_pages(struct linux_binprm *bprm, + unsigned long stack_top, int exec_stack); + #endif #endif /* !CONFIG_IA32_SUPPORT */ diff --git a/include/asm-x86_64/mce.h b/include/asm-x86_64/mce.h index 869249db6795..5d298b799a9f 100644 --- a/include/asm-x86_64/mce.h +++ b/include/asm-x86_64/mce.h @@ -67,6 +67,8 @@ struct mce_log { /* Software defined banks */ #define MCE_EXTENDED_BANK 128 #define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0 +#define MCE_THRESHOLD_BASE MCE_EXTENDED_BANK + 1 /* MCE_AMD */ +#define MCE_THRESHOLD_DRAM_ECC MCE_THRESHOLD_BASE + 4 void mce_log(struct mce *m); #ifdef CONFIG_X86_MCE_INTEL @@ -77,4 +79,12 @@ static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) } #endif +#ifdef CONFIG_X86_MCE_AMD +void mce_amd_feature_init(struct cpuinfo_x86 *c); +#else +static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) +{ +} +#endif + #endif diff --git a/include/asm-x86_64/mmzone.h b/include/asm-x86_64/mmzone.h index b40c661f111e..69baaa8a3ce0 100644 --- a/include/asm-x86_64/mmzone.h +++ b/include/asm-x86_64/mmzone.h @@ -17,16 +17,15 @@ /* Simple perfect hash to map physical addresses to node numbers */ extern int memnode_shift; extern u8 memnodemap[NODEMAPSIZE]; -extern int maxnode; extern struct pglist_data *node_data[]; static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) { - int nid; + unsigned nid; VIRTUAL_BUG_ON((addr >> memnode_shift) >= NODEMAPSIZE); nid = memnodemap[addr >> memnode_shift]; - VIRTUAL_BUG_ON(nid > maxnode); + VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); return nid; } @@ -41,9 +40,7 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) #define pfn_to_nid(pfn) phys_to_nid((unsigned long)(pfn) << PAGE_SHIFT) #define kvaddr_to_nid(kaddr) phys_to_nid(__pa(kaddr)) -/* AK: this currently doesn't deal with invalid addresses. We'll see - if the 2.5 kernel doesn't pass them - (2.4 used to). */ +/* Requires pfn_valid(pfn) to be true */ #define pfn_to_page(pfn) ({ \ int nid = phys_to_nid(((unsigned long)(pfn)) << PAGE_SHIFT); \ ((pfn) - node_start_pfn(nid)) + NODE_DATA(nid)->node_mem_map; \ diff --git a/include/asm-x86_64/mpspec.h b/include/asm-x86_64/mpspec.h index f267e10c023d..6f8a17d105ab 100644 --- a/include/asm-x86_64/mpspec.h +++ b/include/asm-x86_64/mpspec.h @@ -16,7 +16,7 @@ /* * A maximum of 255 APICs with the current APIC ID architecture. */ -#define MAX_APICS 128 +#define MAX_APICS 255 struct intel_mp_floating { @@ -157,7 +157,8 @@ struct mpc_config_lintsrc */ #define MAX_MP_BUSSES 256 -#define MAX_IRQ_SOURCES 256 +/* Each PCI slot may be a combo card with its own bus. 4 IRQ pins per slot. */ +#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4) enum mp_bustype { MP_BUS_ISA = 1, MP_BUS_EISA, @@ -172,7 +173,7 @@ extern int smp_found_config; extern void find_smp_config (void); extern void get_smp_config (void); extern int nr_ioapics; -extern int apic_version [MAX_APICS]; +extern unsigned char apic_version [MAX_APICS]; extern int mp_irq_entries; extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES]; extern int mpc_default_type; diff --git a/include/asm-x86_64/msr.h b/include/asm-x86_64/msr.h index 5a7fe3c6c3d8..24dc39651bc4 100644 --- a/include/asm-x86_64/msr.h +++ b/include/asm-x86_64/msr.h @@ -19,7 +19,7 @@ : "=a" (a__), "=d" (b__) \ : "c" (msr)); \ val = a__ | (b__<<32); \ -} while(0); +} while(0) #define wrmsr(msr,val1,val2) \ __asm__ __volatile__("wrmsr" \ diff --git a/include/asm-x86_64/numa.h b/include/asm-x86_64/numa.h index bcf55c3f7f7f..d51e56fdc3da 100644 --- a/include/asm-x86_64/numa.h +++ b/include/asm-x86_64/numa.h @@ -17,6 +17,8 @@ extern void numa_add_cpu(int cpu); extern void numa_init_array(void); extern int numa_off; +extern void numa_set_node(int cpu, int node); + extern unsigned char apicid_to_node[256]; #define NUMA_NO_NODE 0xff diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h index e5ab4d231f2c..06e489f32472 100644 --- a/include/asm-x86_64/page.h +++ b/include/asm-x86_64/page.h @@ -11,7 +11,7 @@ #define PAGE_SIZE (1UL << PAGE_SHIFT) #endif #define PAGE_MASK (~(PAGE_SIZE-1)) -#define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & (__PHYSICAL_MASK << PAGE_SHIFT)) +#define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK) #define THREAD_ORDER 1 #ifdef __ASSEMBLY__ diff --git a/include/asm-x86_64/pda.h b/include/asm-x86_64/pda.h index bbf89aa8a1af..8733ccfa442e 100644 --- a/include/asm-x86_64/pda.h +++ b/include/asm-x86_64/pda.h @@ -15,6 +15,7 @@ struct x8664_pda { int irqcount; /* Irq nesting counter. Starts with -1 */ int cpunumber; /* Logical CPU number */ char *irqstackptr; /* top of irqstack */ + int nodenumber; /* number of current node */ unsigned int __softirq_pending; unsigned int __nmi_count; /* number of NMI on this CPUs */ struct mm_struct *active_mm; diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index 7309fffeec9a..ecf58c7c1650 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -16,6 +16,7 @@ extern pud_t level3_physmem_pgt[512]; extern pud_t level3_ident_pgt[512]; extern pmd_t level2_kernel_pgt[512]; extern pgd_t init_level4_pgt[]; +extern pgd_t boot_level4_pgt[]; extern unsigned long __supported_pte_mask; #define swapper_pg_dir init_level4_pgt @@ -247,7 +248,7 @@ static inline unsigned long pud_bad(pud_t pud) #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) /* FIXME: is this right? */ #define pte_page(x) pfn_to_page(pte_pfn(x)) -#define pte_pfn(x) ((pte_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK) +#define pte_pfn(x) ((pte_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT) static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { @@ -354,7 +355,7 @@ static inline pud_t *__pud_offset_k(pud_t *pud, unsigned long address) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) #define pmd_bad(x) ((pmd_val(x) & (~PTE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE ) #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot))) -#define pmd_pfn(x) ((pmd_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK) +#define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT) #define pte_to_pgoff(pte) ((pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) #define pgoff_to_pte(off) ((pte_t) { ((off) << PAGE_SHIFT) | _PAGE_FILE }) diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index 03837d34fba0..4861246548f7 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h @@ -61,10 +61,12 @@ struct cpuinfo_x86 { int x86_cache_alignment; int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/ __u8 x86_virt_bits, x86_phys_bits; - __u8 x86_num_cores; + __u8 x86_max_cores; /* cpuid returned max cores value */ __u32 x86_power; __u32 extended_cpuid_level; /* Max extended CPUID function supported */ unsigned long loops_per_jiffy; + __u8 apicid; + __u8 booted_cores; /* number of cores as seen by OS */ } ____cacheline_aligned; #define X86_VENDOR_INTEL 0 diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index dbb37b0adb43..34501086afef 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -11,6 +11,8 @@ struct pt_regs; extern void start_kernel(void); extern void pda_init(int); +extern void zap_low_mappings(int cpu); + extern void early_idt_handler(void); extern void mcheck_init(struct cpuinfo_x86 *c); @@ -22,6 +24,8 @@ extern void mtrr_bp_init(void); #define mtrr_bp_init() do {} while (0) #endif extern void init_memory_mapping(unsigned long start, unsigned long end); +extern void size_zones(unsigned long *z, unsigned long *h, + unsigned long start_pfn, unsigned long end_pfn); extern void system_call(void); extern int kernel_syscall(void); diff --git a/include/asm-x86_64/rwsem.h b/include/asm-x86_64/rwsem.h deleted file mode 100644 index 46077e9c1910..000000000000 --- a/include/asm-x86_64/rwsem.h +++ /dev/null @@ -1,283 +0,0 @@ -/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for x86_64+ - * - * Written by David Howells (dhowells@redhat.com). - * Ported by Andi Kleen <ak@suse.de> to x86-64. - * - * Derived from asm-i386/semaphore.h and asm-i386/rwsem.h - * - * - * The MSW of the count is the negated number of active writers and waiting - * lockers, and the LSW is the total number of active locks - * - * The lock count is initialized to 0 (no active and no waiting lockers). - * - * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an - * uncontended lock. This can be determined because XADD returns the old value. - * Readers increment by 1 and see a positive value when uncontended, negative - * if there are writers (and maybe) readers waiting (in which case it goes to - * sleep). - * - * The value of WAITING_BIAS supports up to 32766 waiting processes. This can - * be extended to 65534 by manually checking the whole MSW rather than relying - * on the S flag. - * - * The value of ACTIVE_BIAS supports up to 65535 active processes. - * - * This should be totally fair - if anything is waiting, a process that wants a - * lock will go to the back of the queue. When the currently active lock is - * released, if there's a writer at the front of the queue, then that and only - * that will be woken up; if there's a bunch of consecutive readers at the - * front, then they'll all be woken up, but no other readers will be. - */ - -#ifndef _X8664_RWSEM_H -#define _X8664_RWSEM_H - -#ifndef _LINUX_RWSEM_H -#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead" -#endif - -#ifdef __KERNEL__ - -#include <linux/list.h> -#include <linux/spinlock.h> - -struct rwsem_waiter; - -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); - -/* - * the semaphore definition - */ -struct rw_semaphore { - signed int count; -#define RWSEM_UNLOCKED_VALUE 0x00000000 -#define RWSEM_ACTIVE_BIAS 0x00000001 -#define RWSEM_ACTIVE_MASK 0x0000ffff -#define RWSEM_WAITING_BIAS (-0x00010000) -#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS -#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) - spinlock_t wait_lock; - struct list_head wait_list; -#if RWSEM_DEBUG - int debug; -#endif -}; - -/* - * initialisation - */ -#if RWSEM_DEBUG -#define __RWSEM_DEBUG_INIT , 0 -#else -#define __RWSEM_DEBUG_INIT /* */ -#endif - -#define __RWSEM_INITIALIZER(name) \ -{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEBUG_INIT } - -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) - -static inline void init_rwsem(struct rw_semaphore *sem) -{ - sem->count = RWSEM_UNLOCKED_VALUE; - spin_lock_init(&sem->wait_lock); - INIT_LIST_HEAD(&sem->wait_list); -#if RWSEM_DEBUG - sem->debug = 0; -#endif -} - -/* - * lock for reading - */ -static inline void __down_read(struct rw_semaphore *sem) -{ - __asm__ __volatile__( - "# beginning down_read\n\t" -LOCK_PREFIX " incl (%%rdi)\n\t" /* adds 0x00000001, returns the old value */ - " js 2f\n\t" /* jump if we weren't granted the lock */ - "1:\n\t" - LOCK_SECTION_START("") \ - "2:\n\t" - " call rwsem_down_read_failed_thunk\n\t" - " jmp 1b\n" - LOCK_SECTION_END \ - "# ending down_read\n\t" - : "+m"(sem->count) - : "D"(sem) - : "memory", "cc"); -} - - -/* - * trylock for reading -- returns 1 if successful, 0 if contention - */ -static inline int __down_read_trylock(struct rw_semaphore *sem) -{ - __s32 result, tmp; - __asm__ __volatile__( - "# beginning __down_read_trylock\n\t" - " movl %0,%1\n\t" - "1:\n\t" - " movl %1,%2\n\t" - " addl %3,%2\n\t" - " jle 2f\n\t" -LOCK_PREFIX " cmpxchgl %2,%0\n\t" - " jnz 1b\n\t" - "2:\n\t" - "# ending __down_read_trylock\n\t" - : "+m"(sem->count), "=&a"(result), "=&r"(tmp) - : "i"(RWSEM_ACTIVE_READ_BIAS) - : "memory", "cc"); - return result>=0 ? 1 : 0; -} - - -/* - * lock for writing - */ -static inline void __down_write(struct rw_semaphore *sem) -{ - int tmp; - - tmp = RWSEM_ACTIVE_WRITE_BIAS; - __asm__ __volatile__( - "# beginning down_write\n\t" -LOCK_PREFIX " xaddl %0,(%%rdi)\n\t" /* subtract 0x0000ffff, returns the old value */ - " testl %0,%0\n\t" /* was the count 0 before? */ - " jnz 2f\n\t" /* jump if we weren't granted the lock */ - "1:\n\t" - LOCK_SECTION_START("") - "2:\n\t" - " call rwsem_down_write_failed_thunk\n\t" - " jmp 1b\n" - LOCK_SECTION_END - "# ending down_write" - : "=&r" (tmp) - : "0"(tmp), "D"(sem) - : "memory", "cc"); -} - -/* - * trylock for writing -- returns 1 if successful, 0 if contention - */ -static inline int __down_write_trylock(struct rw_semaphore *sem) -{ - signed long ret = cmpxchg(&sem->count, - RWSEM_UNLOCKED_VALUE, - RWSEM_ACTIVE_WRITE_BIAS); - if (ret == RWSEM_UNLOCKED_VALUE) - return 1; - return 0; -} - -/* - * unlock after reading - */ -static inline void __up_read(struct rw_semaphore *sem) -{ - __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; - __asm__ __volatile__( - "# beginning __up_read\n\t" -LOCK_PREFIX " xaddl %[tmp],(%%rdi)\n\t" /* subtracts 1, returns the old value */ - " js 2f\n\t" /* jump if the lock is being waited upon */ - "1:\n\t" - LOCK_SECTION_START("") - "2:\n\t" - " decw %w[tmp]\n\t" /* do nothing if still outstanding active readers */ - " jnz 1b\n\t" - " call rwsem_wake_thunk\n\t" - " jmp 1b\n" - LOCK_SECTION_END - "# ending __up_read\n" - : "+m"(sem->count), [tmp] "+r" (tmp) - : "D"(sem) - : "memory", "cc"); -} - -/* - * unlock after writing - */ -static inline void __up_write(struct rw_semaphore *sem) -{ - unsigned tmp; - __asm__ __volatile__( - "# beginning __up_write\n\t" - " movl %[bias],%[tmp]\n\t" -LOCK_PREFIX " xaddl %[tmp],(%%rdi)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ - " jnz 2f\n\t" /* jump if the lock is being waited upon */ - "1:\n\t" - LOCK_SECTION_START("") - "2:\n\t" - " decw %w[tmp]\n\t" /* did the active count reduce to 0? */ - " jnz 1b\n\t" /* jump back if not */ - " call rwsem_wake_thunk\n\t" - " jmp 1b\n" - LOCK_SECTION_END - "# ending __up_write\n" - : "+m"(sem->count), [tmp] "=r" (tmp) - : "D"(sem), [bias] "i"(-RWSEM_ACTIVE_WRITE_BIAS) - : "memory", "cc"); -} - -/* - * downgrade write lock to read lock - */ -static inline void __downgrade_write(struct rw_semaphore *sem) -{ - __asm__ __volatile__( - "# beginning __downgrade_write\n\t" -LOCK_PREFIX " addl %[bias],(%%rdi)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ - " js 2f\n\t" /* jump if the lock is being waited upon */ - "1:\n\t" - LOCK_SECTION_START("") - "2:\n\t" - " call rwsem_downgrade_thunk\n" - " jmp 1b\n" - LOCK_SECTION_END - "# ending __downgrade_write\n" - : "=m"(sem->count) - : "D"(sem), [bias] "i"(-RWSEM_WAITING_BIAS), "m"(sem->count) - : "memory", "cc"); -} - -/* - * implement atomic add functionality - */ -static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) -{ - __asm__ __volatile__( -LOCK_PREFIX "addl %1,%0" - :"=m"(sem->count) - :"ir"(delta), "m"(sem->count)); -} - -/* - * implement exchange and add functionality - */ -static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) -{ - int tmp = delta; - - __asm__ __volatile__( -LOCK_PREFIX "xaddl %0,(%2)" - : "=r"(tmp), "=m"(sem->count) - : "r"(sem), "m"(sem->count), "0" (tmp) - : "memory"); - - return tmp+delta; -} - -static inline int rwsem_is_locked(struct rw_semaphore *sem) -{ - return (sem->count != 0); -} - -#endif /* __KERNEL__ */ -#endif /* _X8664_RWSEM_H */ diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index b9fb2173ef99..d030409a8fb5 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h @@ -47,7 +47,6 @@ extern void lock_ipi_call_lock(void); extern void unlock_ipi_call_lock(void); extern int smp_num_siblings; extern void smp_send_reschedule(int cpu); -extern void zap_low_mappings(void); void smp_stop_cpu(void); extern int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, int retry, int wait); @@ -82,6 +81,8 @@ extern int safe_smp_processor_id(void); extern int __cpu_disable(void); extern void __cpu_die(unsigned int cpu); extern void prefill_possible_map(void); +extern unsigned num_processors; +extern unsigned disabled_cpus; #endif /* !ASSEMBLY */ diff --git a/include/asm-x86_64/spinlock.h b/include/asm-x86_64/spinlock.h index 69636831ad2f..fe484a699cc3 100644 --- a/include/asm-x86_64/spinlock.h +++ b/include/asm-x86_64/spinlock.h @@ -18,22 +18,22 @@ */ #define __raw_spin_is_locked(x) \ - (*(volatile signed char *)(&(x)->slock) <= 0) + (*(volatile signed int *)(&(x)->slock) <= 0) #define __raw_spin_lock_string \ "\n1:\t" \ - "lock ; decb %0\n\t" \ + "lock ; decl %0\n\t" \ "js 2f\n" \ LOCK_SECTION_START("") \ "2:\t" \ "rep;nop\n\t" \ - "cmpb $0,%0\n\t" \ + "cmpl $0,%0\n\t" \ "jle 2b\n\t" \ "jmp 1b\n" \ LOCK_SECTION_END #define __raw_spin_unlock_string \ - "movb $1,%0" \ + "movl $1,%0" \ :"=m" (lock->slock) : : "memory" static inline void __raw_spin_lock(raw_spinlock_t *lock) @@ -47,10 +47,10 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) static inline int __raw_spin_trylock(raw_spinlock_t *lock) { - char oldval; + int oldval; __asm__ __volatile__( - "xchgb %b0,%1" + "xchgl %0,%1" :"=q" (oldval), "=m" (lock->slock) :"0" (0) : "memory"); diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h index 1c603cd7e4d0..d39ebd5263ed 100644 --- a/include/asm-x86_64/topology.h +++ b/include/asm-x86_64/topology.h @@ -28,6 +28,8 @@ extern int __node_distance(int, int); #define pcibus_to_node(bus) ((long)(bus->sysdata)) #define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus)); +#define numa_node_id() read_pda(nodenumber) + /* sched_domains SD_NODE_INIT for x86_64 machines */ #define SD_NODE_INIT (struct sched_domain) { \ .span = CPU_MASK_NONE, \ diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h index 3c494b65d33a..2c42150bce0c 100644 --- a/include/asm-x86_64/unistd.h +++ b/include/asm-x86_64/unistd.h @@ -462,7 +462,7 @@ __SYSCALL(__NR_fremovexattr, sys_fremovexattr) #define __NR_tkill 200 __SYSCALL(__NR_tkill, sys_tkill) #define __NR_time 201 -__SYSCALL(__NR_time, sys_time64) +__SYSCALL(__NR_time, sys_time) #define __NR_futex 202 __SYSCALL(__NR_futex, sys_futex) #define __NR_sched_setaffinity 203 @@ -608,6 +608,7 @@ do { \ #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_SYS_RT_SIGACTION +#define __ARCH_WANT_SYS_TIME #define __ARCH_WANT_COMPAT_SYS_TIME #endif diff --git a/include/linux/bitops.h b/include/linux/bitops.h index cb3c3ef50f50..38c2fb7ebe09 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -84,6 +84,16 @@ static __inline__ int get_bitmask_order(unsigned int count) return order; /* We could be slightly more clever with -1 here... */ } +static __inline__ int get_count_order(unsigned int count) +{ + int order; + + order = fls(count) - 1; + if (count & (count - 1)) + order++; + return order; +} + /* * hweightN: returns the hamming weight (i.e. the number * of bits set) of a N-bit word diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 23279d8f19b1..313dfe9b443a 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -14,6 +14,13 @@ struct vm_area_struct; /* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */ #define __GFP_DMA ((__force gfp_t)0x01u) #define __GFP_HIGHMEM ((__force gfp_t)0x02u) +#ifdef CONFIG_DMA_IS_DMA32 +#define __GFP_DMA32 ((__force gfp_t)0x01) /* ZONE_DMA is ZONE_DMA32 */ +#elif BITS_PER_LONG < 64 +#define __GFP_DMA32 ((__force gfp_t)0x00) /* ZONE_NORMAL is ZONE_DMA32 */ +#else +#define __GFP_DMA32 ((__force gfp_t)0x04) /* Has own ZONE_DMA32 */ +#endif /* * Action modifiers - doesn't change the zoning @@ -63,6 +70,10 @@ struct vm_area_struct; #define GFP_DMA __GFP_DMA +/* 4GB DMA on some platforms */ +#define GFP_DMA32 __GFP_DMA32 + + #define gfp_zone(mask) ((__force int)((mask) & (__force gfp_t)GFP_ZONEMASK)) /* diff --git a/include/linux/libata.h b/include/linux/libata.h index ad5996183ec2..f2dbb684ce9e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -59,6 +59,8 @@ #define VPRINTK(fmt, args...) #endif /* ATA_DEBUG */ +#define BPRINTK(fmt, args...) if (ap->flags & ATA_FLAG_DEBUGMSG) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ## args) + #ifdef ATA_NDEBUG #define assert(expr) #else @@ -119,6 +121,7 @@ enum { ATA_FLAG_PIO_DMA = (1 << 8), /* PIO cmds via DMA */ ATA_FLAG_NOINTR = (1 << 9), /* FIXME: Remove this once * proper HSM is in place. */ + ATA_FLAG_DEBUGMSG = (1 << 10), ATA_QCFLAG_ACTIVE = (1 << 1), /* cmd not yet ack'd to scsi lyer */ ATA_QCFLAG_SG = (1 << 3), /* have s/g table? */ @@ -659,6 +662,17 @@ static inline void ata_tf_init(struct ata_port *ap, struct ata_taskfile *tf, uns tf->device = ATA_DEVICE_OBS | ATA_DEV1; } +static inline void ata_qc_reinit(struct ata_queued_cmd *qc) +{ + qc->__sg = NULL; + qc->flags = 0; + qc->cursect = qc->cursg = qc->cursg_ofs = 0; + qc->nsect = 0; + qc->nbytes = qc->curbytes = 0; + + ata_tf_init(qc->ap, &qc->tf, qc->dev->devno); +} + /** * ata_irq_on - Enable interrupts on a port. diff --git a/include/linux/mm.h b/include/linux/mm.h index 7b115feca4df..1013a42d10b1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -206,12 +206,6 @@ struct vm_operations_struct { struct mmu_gather; struct inode; -#ifdef ARCH_HAS_ATOMIC_UNSIGNED -typedef unsigned page_flags_t; -#else -typedef unsigned long page_flags_t; -#endif - /* * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the @@ -219,7 +213,7 @@ typedef unsigned long page_flags_t; * a page. */ struct page { - page_flags_t flags; /* Atomic flags, some possibly + unsigned long flags; /* Atomic flags, some possibly * updated asynchronously */ atomic_t _count; /* Usage count, see below. */ atomic_t _mapcount; /* Count of ptes mapped in mms, @@ -435,7 +429,7 @@ static inline void put_page(struct page *page) #endif /* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */ -#define SECTIONS_PGOFF ((sizeof(page_flags_t)*8) - SECTIONS_WIDTH) +#define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) #define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6cfb114a0c34..2c8edad5dccf 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -71,10 +71,11 @@ struct per_cpu_pageset { #endif #define ZONE_DMA 0 -#define ZONE_NORMAL 1 -#define ZONE_HIGHMEM 2 +#define ZONE_DMA32 1 +#define ZONE_NORMAL 2 +#define ZONE_HIGHMEM 3 -#define MAX_NR_ZONES 3 /* Sync this with ZONES_SHIFT */ +#define MAX_NR_ZONES 4 /* Sync this with ZONES_SHIFT */ #define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ @@ -108,9 +109,10 @@ struct per_cpu_pageset { /* * On machines where it is needed (eg PCs) we divide physical memory - * into multiple physical zones. On a PC we have 3 zones: + * into multiple physical zones. On a PC we have 4 zones: * * ZONE_DMA < 16 MB ISA DMA capable memory + * ZONE_DMA32 0 MB Empty * ZONE_NORMAL 16-896 MB direct mapped by the kernel * ZONE_HIGHMEM > 896 MB only page cache and user processes */ @@ -433,7 +435,9 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, #include <linux/topology.h> /* Returns the number of the current Node. */ +#ifndef numa_node_id #define numa_node_id() (cpu_to_node(raw_smp_processor_id())) +#endif #ifndef CONFIG_NEED_MULTIPLE_NODES @@ -453,12 +457,12 @@ extern struct pglist_data contig_page_data; #include <asm/sparsemem.h> #endif -#if BITS_PER_LONG == 32 || defined(ARCH_HAS_ATOMIC_UNSIGNED) +#if BITS_PER_LONG == 32 /* - * with 32 bit page->flags field, we reserve 8 bits for node/zone info. - * there are 3 zones (2 bits) and this leaves 8-2=6 bits for nodes. + * with 32 bit page->flags field, we reserve 9 bits for node/zone info. + * there are 4 zones (3 bits) and this leaves 9-3=6 bits for nodes. */ -#define FLAGS_RESERVED 8 +#define FLAGS_RESERVED 9 #elif BITS_PER_LONG == 64 /* diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 72975fa8795d..934a2479f160 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -112,7 +112,6 @@ struct nfnl_callback { int (*call)(struct sock *nl, struct sk_buff *skb, struct nlmsghdr *nlh, struct nfattr *cda[], int *errp); - kernel_cap_t cap_required; /* capabilities required for this msg */ u_int16_t attr_count; /* number of nfattr's */ }; @@ -154,11 +153,14 @@ extern void nfattr_parse(struct nfattr *tb[], int maxattr, #define nfattr_bad_size(tb, max, cta_min) \ ({ int __i, __res = 0; \ - for (__i=0; __i<max; __i++) \ + for (__i=0; __i<max; __i++) { \ + if (!cta_min[__i]) \ + continue; \ if (tb[__i] && NFA_PAYLOAD(tb[__i]) < cta_min[__i]){ \ __res = 1; \ break; \ } \ + } \ __res; \ }) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 64f203c45378..6bc03c911a83 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -20,7 +20,6 @@ #include <linux/kernel.h> #include <linux/types.h> -#include <linux/list.h> #include <linux/compiler.h> struct file; @@ -859,6 +858,7 @@ enum }; #ifdef __KERNEL__ +#include <linux/list.h> extern void sysctl_init(void); diff --git a/mm/filemap.c b/mm/filemap.c index 5d6e4c2000dc..33a28bfde158 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -134,7 +134,7 @@ static int sync_page(void *word) struct address_space *mapping; struct page *page; - page = container_of((page_flags_t *)word, struct page, flags); + page = container_of((unsigned long *)word, struct page, flags); /* * page_mapping() is being called without PG_locked held. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3c5cf664abd2..104e69ca55e0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -60,8 +60,11 @@ long nr_swap_pages; * NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA * HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL * HIGHMEM allocation will (224M+784M)/256 of ram reserved in ZONE_DMA + * + * TBD: should special case ZONE_DMA32 machines here - in those we normally + * don't need any ZONE_NORMAL reservation */ -int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 32 }; +int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 }; EXPORT_SYMBOL(totalram_pages); @@ -72,7 +75,7 @@ EXPORT_SYMBOL(totalram_pages); struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly; EXPORT_SYMBOL(zone_table); -static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; +static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" }; int min_free_kbytes = 1024; unsigned long __initdata nr_kernel_pages; @@ -124,7 +127,7 @@ static void bad_page(const char *function, struct page *page) printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n", function, current->comm, page); printk(KERN_EMERG "flags:0x%0*lx mapping:%p mapcount:%d count:%d\n", - (int)(2*sizeof(page_flags_t)), (unsigned long)page->flags, + (int)(2*sizeof(unsigned long)), (unsigned long)page->flags, page->mapping, page_mapcount(page), page_count(page)); printk(KERN_EMERG "Backtrace:\n"); dump_stack(); @@ -1421,6 +1424,10 @@ static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zoneli zone = pgdat->node_zones + ZONE_NORMAL; if (zone->present_pages) zonelist->zones[j++] = zone; + case ZONE_DMA32: + zone = pgdat->node_zones + ZONE_DMA32; + if (zone->present_pages) + zonelist->zones[j++] = zone; case ZONE_DMA: zone = pgdat->node_zones + ZONE_DMA; if (zone->present_pages) @@ -1435,6 +1442,8 @@ static inline int highest_zone(int zone_bits) int res = ZONE_NORMAL; if (zone_bits & (__force int)__GFP_HIGHMEM) res = ZONE_HIGHMEM; + if (zone_bits & (__force int)__GFP_DMA32) + res = ZONE_DMA32; if (zone_bits & (__force int)__GFP_DMA) res = ZONE_DMA; return res; @@ -1846,11 +1855,10 @@ static int __devinit pageset_cpuup_callback(struct notifier_block *nfb, if (process_zones(cpu)) ret = NOTIFY_BAD; break; -#ifdef CONFIG_HOTPLUG_CPU + case CPU_UP_CANCELED: case CPU_DEAD: free_zone_pagesets(cpu); break; -#endif default: break; } @@ -1955,7 +1963,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat, if (zholes_size) realsize -= zholes_size[j]; - if (j == ZONE_DMA || j == ZONE_NORMAL) + if (j < ZONE_HIGHMEM) nr_kernel_pages += realsize; nr_all_pages += realsize; diff --git a/net/Makefile b/net/Makefile index 4aa2f46d2a56..f5141b9d4f38 100644 --- a/net/Makefile +++ b/net/Makefile @@ -15,8 +15,8 @@ obj-$(CONFIG_NET) += $(tmp-y) # LLC has to be linked before the files in net/802/ obj-$(CONFIG_LLC) += llc/ obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ -obj-$(CONFIG_INET) += ipv4/ obj-$(CONFIG_NETFILTER) += netfilter/ +obj-$(CONFIG_INET) += ipv4/ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX) += unix/ ifneq ($(CONFIG_IPV6),) diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index d2a4fec22862..de9f4464438d 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -467,7 +467,7 @@ out: } #endif -static const int cta_min_ip[CTA_IP_MAX] = { +static const size_t cta_min_ip[CTA_IP_MAX] = { [CTA_IP_V4_SRC-1] = sizeof(u_int32_t), [CTA_IP_V4_DST-1] = sizeof(u_int32_t), }; @@ -497,7 +497,7 @@ ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple) return 0; } -static const int cta_min_proto[CTA_PROTO_MAX] = { +static const size_t cta_min_proto[CTA_PROTO_MAX] = { [CTA_PROTO_NUM-1] = sizeof(u_int16_t), [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t), [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t), @@ -576,7 +576,7 @@ ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple, } #ifdef CONFIG_IP_NF_NAT_NEEDED -static const int cta_min_protonat[CTA_PROTONAT_MAX] = { +static const size_t cta_min_protonat[CTA_PROTONAT_MAX] = { [CTA_PROTONAT_PORT_MIN-1] = sizeof(u_int16_t), [CTA_PROTONAT_PORT_MAX-1] = sizeof(u_int16_t), }; @@ -614,6 +614,11 @@ static int ctnetlink_parse_nat_proto(struct nfattr *attr, return 0; } +static const size_t cta_min_nat[CTA_NAT_MAX] = { + [CTA_NAT_MINIP-1] = sizeof(u_int32_t), + [CTA_NAT_MAXIP-1] = sizeof(u_int32_t), +}; + static inline int ctnetlink_parse_nat(struct nfattr *cda[], const struct ip_conntrack *ct, struct ip_nat_range *range) @@ -627,6 +632,9 @@ ctnetlink_parse_nat(struct nfattr *cda[], nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]); + if (nfattr_bad_size(tb, CTA_NAT_MAX, cta_min_nat)) + return -EINVAL; + if (tb[CTA_NAT_MINIP-1]) range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]); @@ -667,6 +675,14 @@ ctnetlink_parse_help(struct nfattr *attr, char **helper_name) return 0; } +static const size_t cta_min[CTA_MAX] = { + [CTA_STATUS-1] = sizeof(u_int32_t), + [CTA_TIMEOUT-1] = sizeof(u_int32_t), + [CTA_MARK-1] = sizeof(u_int32_t), + [CTA_USE-1] = sizeof(u_int32_t), + [CTA_ID-1] = sizeof(u_int32_t) +}; + static int ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) @@ -678,6 +694,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, DEBUGP("entered %s\n", __FUNCTION__); + if (nfattr_bad_size(cda, CTA_MAX, cta_min)) + return -EINVAL; + if (cda[CTA_TUPLE_ORIG-1]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); else if (cda[CTA_TUPLE_REPLY-1]) @@ -760,6 +779,9 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, return 0; } + if (nfattr_bad_size(cda, CTA_MAX, cta_min)) + return -EINVAL; + if (cda[CTA_TUPLE_ORIG-1]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); else if (cda[CTA_TUPLE_REPLY-1]) @@ -1047,6 +1069,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, DEBUGP("entered %s\n", __FUNCTION__); + if (nfattr_bad_size(cda, CTA_MAX, cta_min)) + return -EINVAL; + if (cda[CTA_TUPLE_ORIG-1]) { err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG); if (err < 0) @@ -1252,6 +1277,11 @@ out: return skb->len; } +static const size_t cta_min_exp[CTA_EXPECT_MAX] = { + [CTA_EXPECT_TIMEOUT-1] = sizeof(u_int32_t), + [CTA_EXPECT_ID-1] = sizeof(u_int32_t) +}; + static int ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) @@ -1263,6 +1293,9 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, DEBUGP("entered %s\n", __FUNCTION__); + if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) + return -EINVAL; + if (nlh->nlmsg_flags & NLM_F_DUMP) { struct nfgenmsg *msg = NLMSG_DATA(nlh); u32 rlen; @@ -1333,6 +1366,9 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, struct ip_conntrack_helper *h; int err; + if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) + return -EINVAL; + if (cda[CTA_EXPECT_TUPLE-1]) { /* delete a single expect by tuple */ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); @@ -1462,6 +1498,9 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, DEBUGP("entered %s\n", __FUNCTION__); + if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) + return -EINVAL; + if (!cda[CTA_EXPECT_TUPLE-1] || !cda[CTA_EXPECT_MASK-1] || !cda[CTA_EXPECT_MASTER-1]) @@ -1504,29 +1543,22 @@ static struct notifier_block ctnl_notifier_exp = { static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack, - .attr_count = CTA_MAX, - .cap_required = CAP_NET_ADMIN }, + .attr_count = CTA_MAX, }, [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack, - .attr_count = CTA_MAX, - .cap_required = CAP_NET_ADMIN }, + .attr_count = CTA_MAX, }, [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack, - .attr_count = CTA_MAX, - .cap_required = CAP_NET_ADMIN }, + .attr_count = CTA_MAX, }, [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack, - .attr_count = CTA_MAX, - .cap_required = CAP_NET_ADMIN }, + .attr_count = CTA_MAX, }, }; static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = { [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect, - .attr_count = CTA_EXPECT_MAX, - .cap_required = CAP_NET_ADMIN }, + .attr_count = CTA_EXPECT_MAX, }, [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect, - .attr_count = CTA_EXPECT_MAX, - .cap_required = CAP_NET_ADMIN }, + .attr_count = CTA_EXPECT_MAX, }, [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect, - .attr_count = CTA_EXPECT_MAX, - .cap_required = CAP_NET_ADMIN }, + .attr_count = CTA_EXPECT_MAX, }, }; static struct nfnetlink_subsystem ctnl_subsys = { diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 5b3f5220f289..ee3b7d6c4d2e 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -357,6 +357,10 @@ nfattr_failure: return -1; } +static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = { + [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t), +}; + static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct) { struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1]; @@ -369,6 +373,9 @@ static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct) nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr); + if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp)) + return -EINVAL; + if (!tb[CTA_PROTOINFO_TCP_STATE-1]) return -EINVAL; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 971ba60bf6e9..060d61202412 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -5,10 +5,20 @@ menu "IPv6: Netfilter Configuration (EXPERIMENTAL)" depends on INET && IPV6 && NETFILTER && EXPERIMENTAL -#tristate 'Connection tracking (required for masq/NAT)' CONFIG_IP6_NF_CONNTRACK -#if [ "$CONFIG_IP6_NF_CONNTRACK" != "n" ]; then -# dep_tristate ' FTP protocol support' CONFIG_IP6_NF_FTP $CONFIG_IP6_NF_CONNTRACK -#fi +config NF_CONNTRACK_IPV6 + tristate "IPv6 support for new connection tracking (EXPERIMENTAL)" + depends on EXPERIMENTAL && NF_CONNTRACK + ---help--- + Connection tracking keeps a record of what packets have passed + through your machine, in order to figure out how they are related + into connections. + + This is IPv6 support on Layer 3 independent connection tracking. + Layer 3 independent connection tracking is experimental scheme + which generalize ip_conntrack to support other layer 3 protocols. + + To compile it as a module, choose M here. If unsure, say N. + config IP6_NF_QUEUE tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)" ---help--- @@ -114,7 +124,6 @@ config IP6_NF_MATCH_OWNER To compile it as a module, choose M here. If unsure, say N. -# dep_tristate ' MAC address match support' CONFIG_IP6_NF_MATCH_MAC $CONFIG_IP6_NF_IPTABLES config IP6_NF_MATCH_MARK tristate "netfilter MARK match support" depends on IP6_NF_IPTABLES @@ -170,15 +179,6 @@ config IP6_NF_MATCH_PHYSDEV To compile it as a module, choose M here. If unsure, say N. -# dep_tristate ' Multiple port match support' CONFIG_IP6_NF_MATCH_MULTIPORT $CONFIG_IP6_NF_IPTABLES -# dep_tristate ' TOS match support' CONFIG_IP6_NF_MATCH_TOS $CONFIG_IP6_NF_IPTABLES -# if [ "$CONFIG_IP6_NF_CONNTRACK" != "n" ]; then -# dep_tristate ' Connection state match support' CONFIG_IP6_NF_MATCH_STATE $CONFIG_IP6_NF_CONNTRACK $CONFIG_IP6_NF_IPTABLES -# fi -# if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then -# dep_tristate ' Unclean match support (EXPERIMENTAL)' CONFIG_IP6_NF_MATCH_UNCLEAN $CONFIG_IP6_NF_IPTABLES -# dep_tristate ' Owner match support (EXPERIMENTAL)' CONFIG_IP6_NF_MATCH_OWNER $CONFIG_IP6_NF_IPTABLES -# fi # The targets config IP6_NF_FILTER tristate "Packet filtering" @@ -220,12 +220,6 @@ config IP6_NF_TARGET_NFQUEUE To compile it as a module, choose M here. If unsure, say N. -# if [ "$CONFIG_IP6_NF_FILTER" != "n" ]; then -# dep_tristate ' REJECT target support' CONFIG_IP6_NF_TARGET_REJECT $CONFIG_IP6_NF_FILTER -# if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then -# dep_tristate ' MIRROR target support (EXPERIMENTAL)' CONFIG_IP6_NF_TARGET_MIRROR $CONFIG_IP6_NF_FILTER -# fi -# fi config IP6_NF_MANGLE tristate "Packet mangling" depends on IP6_NF_IPTABLES @@ -236,7 +230,6 @@ config IP6_NF_MANGLE To compile it as a module, choose M here. If unsure, say N. -# dep_tristate ' TOS target support' CONFIG_IP6_NF_TARGET_TOS $CONFIG_IP_NF_MANGLE config IP6_NF_TARGET_MARK tristate "MARK target support" depends on IP6_NF_MANGLE @@ -266,7 +259,6 @@ config IP6_NF_TARGET_HL To compile it as a module, choose M here. If unsure, say N. -#dep_tristate ' LOG target support' CONFIG_IP6_NF_TARGET_LOG $CONFIG_IP6_NF_IPTABLES config IP6_NF_RAW tristate 'raw table support (required for TRACE)' depends on IP6_NF_IPTABLES @@ -278,19 +270,5 @@ config IP6_NF_RAW If you want to compile it as a module, say M here and read <file:Documentation/modules.txt>. If unsure, say `N'. -config NF_CONNTRACK_IPV6 - tristate "IPv6 support for new connection tracking (EXPERIMENTAL)" - depends on EXPERIMENTAL && NF_CONNTRACK - ---help--- - Connection tracking keeps a record of what packets have passed - through your machine, in order to figure out how they are related - into connections. - - This is IPv6 support on Layer 3 independent connection tracking. - Layer 3 independent connection tracking is experimental scheme - which generalize ip_conntrack to support other layer 3 protocols. - - To compile it as a module, choose M here. If unsure, say N. - endmenu diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index e2c90b3a8074..753a3ae8502b 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -339,8 +339,8 @@ extern unsigned long nf_ct_icmpv6_timeout; /* From nf_conntrack_frag6.c */ extern unsigned long nf_ct_frag6_timeout; -extern unsigned long nf_ct_frag6_low_thresh; -extern unsigned long nf_ct_frag6_high_thresh; +extern unsigned int nf_ct_frag6_low_thresh; +extern unsigned int nf_ct_frag6_high_thresh; static struct ctl_table_header *nf_ct_ipv6_sysctl_header; @@ -367,7 +367,7 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &nf_ct_frag6_low_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = &proc_dointvec, }, { .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, @@ -375,7 +375,7 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &nf_ct_frag6_high_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = &proc_dointvec, }, { .ctl_name = 0 } }; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 7640b9bb7694..c2c52af9e560 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -55,9 +55,9 @@ #define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */ #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT -int nf_ct_frag6_high_thresh = 256*1024; -int nf_ct_frag6_low_thresh = 192*1024; -int nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT; +unsigned int nf_ct_frag6_high_thresh = 256*1024; +unsigned int nf_ct_frag6_low_thresh = 192*1024; +unsigned long nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT; struct nf_ct_frag6_skb_cb { @@ -190,8 +190,10 @@ static void nf_ct_frag6_secret_rebuild(unsigned long dummy) atomic_t nf_ct_frag6_mem = ATOMIC_INIT(0); /* Memory Tracking Functions. */ -static inline void frag_kfree_skb(struct sk_buff *skb) +static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work) { + if (work) + *work -= skb->truesize; atomic_sub(skb->truesize, &nf_ct_frag6_mem); if (NFCT_FRAG6_CB(skb)->orig) kfree_skb(NFCT_FRAG6_CB(skb)->orig); @@ -199,8 +201,11 @@ static inline void frag_kfree_skb(struct sk_buff *skb) kfree_skb(skb); } -static inline void frag_free_queue(struct nf_ct_frag6_queue *fq) +static inline void frag_free_queue(struct nf_ct_frag6_queue *fq, + unsigned int *work) { + if (work) + *work -= sizeof(struct nf_ct_frag6_queue); atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem); kfree(fq); } @@ -218,7 +223,8 @@ static inline struct nf_ct_frag6_queue *frag_alloc_queue(void) /* Destruction primitives. */ /* Complete destruction of fq. */ -static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq) +static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq, + unsigned int *work) { struct sk_buff *fp; @@ -230,17 +236,17 @@ static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq) while (fp) { struct sk_buff *xp = fp->next; - frag_kfree_skb(fp); + frag_kfree_skb(fp, work); fp = xp; } - frag_free_queue(fq); + frag_free_queue(fq, work); } -static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) +static __inline__ void fq_put(struct nf_ct_frag6_queue *fq, unsigned int *work) { if (atomic_dec_and_test(&fq->refcnt)) - nf_ct_frag6_destroy(fq); + nf_ct_frag6_destroy(fq, work); } /* Kill fq entry. It is not destroyed immediately, @@ -262,16 +268,21 @@ static void nf_ct_frag6_evictor(void) { struct nf_ct_frag6_queue *fq; struct list_head *tmp; + unsigned int work; - for (;;) { - if (atomic_read(&nf_ct_frag6_mem) <= nf_ct_frag6_low_thresh) - return; + work = atomic_read(&nf_ct_frag6_mem); + if (work <= nf_ct_frag6_low_thresh) + return; + + work -= nf_ct_frag6_low_thresh; + while (work > 0) { read_lock(&nf_ct_frag6_lock); if (list_empty(&nf_ct_frag6_lru_list)) { read_unlock(&nf_ct_frag6_lock); return; } tmp = nf_ct_frag6_lru_list.next; + BUG_ON(tmp == NULL); fq = list_entry(tmp, struct nf_ct_frag6_queue, lru_list); atomic_inc(&fq->refcnt); read_unlock(&nf_ct_frag6_lock); @@ -281,7 +292,7 @@ static void nf_ct_frag6_evictor(void) fq_kill(fq); spin_unlock(&fq->lock); - fq_put(fq); + fq_put(fq, &work); } } @@ -298,7 +309,7 @@ static void nf_ct_frag6_expire(unsigned long data) out: spin_unlock(&fq->lock); - fq_put(fq); + fq_put(fq, NULL); } /* Creation primitives. */ @@ -318,7 +329,7 @@ static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash, atomic_inc(&fq->refcnt); write_unlock(&nf_ct_frag6_lock); fq_in->last_in |= COMPLETE; - fq_put(fq_in); + fq_put(fq_in, NULL); return fq; } } @@ -535,7 +546,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, fq->fragments = next; fq->meat -= free_it->len; - frag_kfree_skb(free_it); + frag_kfree_skb(free_it, NULL); } } @@ -811,7 +822,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { spin_unlock(&fq->lock); DEBUGP("Can't insert skb to queue\n"); - fq_put(fq); + fq_put(fq, NULL); goto ret_orig; } @@ -822,7 +833,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) } spin_unlock(&fq->lock); - fq_put(fq); + fq_put(fq, NULL); return ret_skb; ret_orig: @@ -881,5 +892,6 @@ int nf_ct_frag6_init(void) void nf_ct_frag6_cleanup(void) { del_timer(&nf_ct_frag6_secret_timer); + nf_ct_frag6_low_thresh = 0; nf_ct_frag6_evictor(); } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 156680ddb042..5a6fcf349bdf 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -970,6 +970,12 @@ static int tcp_packet(struct nf_conn *conntrack, conntrack->timeout.function((unsigned long) conntrack); return -NF_REPEAT; + } else { + write_unlock_bh(&tcp_lock); + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, + NULL, "nf_ct_tcp: invalid SYN"); + return -NF_ACCEPT; } case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 83f4c53030fc..a60c59b97631 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -223,6 +223,12 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb, NFNL_SUBSYS_ID(nlh->nlmsg_type), NFNL_MSG_TYPE(nlh->nlmsg_type)); + if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) { + DEBUGP("missing CAP_NET_ADMIN\n"); + *errp = -EPERM; + return -1; + } + /* Only requests are handled by kernel now. */ if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) { DEBUGP("received non-request message\n"); @@ -240,15 +246,12 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb, ss = nfnetlink_get_subsys(type); if (!ss) { #ifdef CONFIG_KMOD - if (cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) { - /* don't call nfnl_shunlock, since it would reenter - * with further packet processing */ - up(&nfnl_sem); - request_module("nfnetlink-subsys-%d", - NFNL_SUBSYS_ID(type)); - nfnl_shlock(); - ss = nfnetlink_get_subsys(type); - } + /* don't call nfnl_shunlock, since it would reenter + * with further packet processing */ + up(&nfnl_sem); + request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type)); + nfnl_shlock(); + ss = nfnetlink_get_subsys(type); if (!ss) #endif goto err_inval; @@ -260,13 +263,6 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb, goto err_inval; } - if (nc->cap_required && - !cap_raised(NETLINK_CB(skb).eff_cap, nc->cap_required)) { - DEBUGP("permission denied for type %d\n", type); - *errp = -EPERM; - return -1; - } - { u_int16_t attr_count = ss->cb[NFNL_MSG_TYPE(nlh->nlmsg_type)].attr_count; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index d194676f3655..cba63729313d 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -862,11 +862,9 @@ out_put: static struct nfnl_callback nfulnl_cb[NFULNL_MSG_MAX] = { [NFULNL_MSG_PACKET] = { .call = nfulnl_recv_unsupp, - .attr_count = NFULA_MAX, - .cap_required = CAP_NET_ADMIN, }, + .attr_count = NFULA_MAX, }, [NFULNL_MSG_CONFIG] = { .call = nfulnl_recv_config, - .attr_count = NFULA_CFG_MAX, - .cap_required = CAP_NET_ADMIN }, + .attr_count = NFULA_CFG_MAX, }, }; static struct nfnetlink_subsystem nfulnl_subsys = { diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index f065a6c94953..f28460b61e47 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -931,14 +931,11 @@ out_put: static struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { [NFQNL_MSG_PACKET] = { .call = nfqnl_recv_unsupp, - .attr_count = NFQA_MAX, - .cap_required = CAP_NET_ADMIN }, + .attr_count = NFQA_MAX, }, [NFQNL_MSG_VERDICT] = { .call = nfqnl_recv_verdict, - .attr_count = NFQA_MAX, - .cap_required = CAP_NET_ADMIN }, + .attr_count = NFQA_MAX, }, [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config, - .attr_count = NFQA_CFG_MAX, - .cap_required = CAP_NET_ADMIN }, + .attr_count = NFQA_CFG_MAX, }, }; static struct nfnetlink_subsystem nfqnl_subsys = { diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index e50e7cf43737..c6a51911e71e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1178,6 +1178,7 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout) arg->tail[0].iov_len = 0; try_to_freeze(); + cond_resched(); if (signalled()) return -EINTR; |