diff options
Diffstat (limited to 'arch/x86')
217 files changed, 9850 insertions, 2606 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig new file mode 100644 index 000000000000..1eb59971af5d --- /dev/null +++ b/arch/x86/Kconfig @@ -0,0 +1,1620 @@ +# x86 configuration +mainmenu "Linux Kernel Configuration for x86" + +# Select 32 or 64 bit +config 64BIT + bool "64-bit kernel" + default n + help + Say yes to build a 64-bit kernel - formerly known as x86_64 + Say no to build a 32-bit kernel - formerly known as i386 + +config X86_32 + def_bool !64BIT + +config X86_64 + def_bool 64BIT + +### Arch settings +config X86 + bool + default y + +config GENERIC_TIME + bool + default y + +config GENERIC_CMOS_UPDATE + bool + default y + +config CLOCKSOURCE_WATCHDOG + bool + default y + +config GENERIC_CLOCKEVENTS + bool + default y + +config GENERIC_CLOCKEVENTS_BROADCAST + bool + default y + depends on X86_64 || (X86_32 && X86_LOCAL_APIC) + +config LOCKDEP_SUPPORT + bool + default y + +config STACKTRACE_SUPPORT + bool + default y + +config SEMAPHORE_SLEEPERS + bool + default y + +config MMU + bool + default y + +config ZONE_DMA + bool + default y + +config QUICKLIST + bool + default X86_32 + +config SBUS + bool + +config GENERIC_ISA_DMA + bool + default y + +config GENERIC_IOMAP + bool + default y + +config GENERIC_BUG + bool + default y + depends on BUG + +config GENERIC_HWEIGHT + bool + default y + +config ARCH_MAY_HAVE_PC_FDC + bool + default y + +config DMI + bool + default y + +config RWSEM_GENERIC_SPINLOCK + def_bool !X86_XADD + +config RWSEM_XCHGADD_ALGORITHM + def_bool X86_XADD + +config ARCH_HAS_ILOG2_U32 + def_bool n + +config ARCH_HAS_ILOG2_U64 + def_bool n + +config GENERIC_CALIBRATE_DELAY + def_bool y + +config GENERIC_TIME_VSYSCALL + bool + default X86_64 + + + + + +config ZONE_DMA32 + bool + default X86_64 + +config ARCH_POPULATES_NODE_MAP + def_bool y + +config AUDIT_ARCH + bool + default X86_64 + +# Use the generic interrupt handling code in kernel/irq/: +config GENERIC_HARDIRQS + bool + default y + +config GENERIC_IRQ_PROBE + bool + default y + +config GENERIC_PENDING_IRQ + bool + depends on GENERIC_HARDIRQS && SMP + default y + +config X86_SMP + bool + depends on X86_32 && SMP && !X86_VOYAGER + default y + +config X86_HT + bool + depends on SMP && !(X86_VISWS || X86_VOYAGER || MK8) + default y + +config X86_BIOS_REBOOT + bool + depends on X86_32 && !(X86_VISWS || X86_VOYAGER) + default y + +config X86_TRAMPOLINE + bool + depends on X86_SMP || (X86_VOYAGER && SMP) + default y + +config KTIME_SCALAR + def_bool X86_32 +source "init/Kconfig" + +menu "Processor type and features" + +source "kernel/time/Kconfig" + +config SMP + bool "Symmetric multi-processing support" + ---help--- + This enables support for systems with more than one CPU. If you have + a system with only one CPU, like most personal computers, say N. If + you have a system with more than one CPU, say Y. + + If you say N here, the kernel will run on single and multiprocessor + machines, but will use only one CPU of a multiprocessor machine. If + you say Y here, the kernel will run on many, but not all, + singleprocessor machines. On a singleprocessor machine, the kernel + will run faster if you say N here. + + Note that if you say Y here and choose architecture "586" or + "Pentium" under "Processor family", the kernel will not work on 486 + architectures. Similarly, multiprocessor kernels for the "PPro" + architecture may not work on all Pentium based boards. + + People using multiprocessor machines who say Y here should also say + Y to "Enhanced Real Time Clock Support", below. The "Advanced Power + Management" code will be disabled if you say Y here. + + See also the <file:Documentation/smp.txt>, + <file:Documentation/i386/IO-APIC.txt>, + <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at + <http://www.tldp.org/docs.html#howto>. + + If you don't know what to do here, say N. + +choice + prompt "Subarchitecture Type" + default X86_PC + +config X86_PC + bool "PC-compatible" + help + Choose this option if your computer is a standard PC or compatible. + +config X86_ELAN + bool "AMD Elan" + depends on X86_32 + help + Select this for an AMD Elan processor. + + Do not use this option for K6/Athlon/Opteron processors! + + If unsure, choose "PC-compatible" instead. + +config X86_VOYAGER + bool "Voyager (NCR)" + depends on X86_32 + select SMP if !BROKEN + help + Voyager is an MCA-based 32-way capable SMP architecture proprietary + to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. + + *** WARNING *** + + If you do not specifically know you have a Voyager based machine, + say N here, otherwise the kernel you build will not be bootable. + +config X86_NUMAQ + bool "NUMAQ (IBM/Sequent)" + select SMP + select NUMA + depends on X86_32 + help + This option is used for getting Linux to run on a (IBM/Sequent) NUMA + multiquad box. This changes the way that processors are bootstrapped, + and uses Clustered Logical APIC addressing mode instead of Flat Logical. + You will need a new lynxer.elf file to flash your firmware with - send + email to <Martin.Bligh@us.ibm.com>. + +config X86_SUMMIT + bool "Summit/EXA (IBM x440)" + depends on X86_32 && SMP + help + This option is needed for IBM systems that use the Summit/EXA chipset. + In particular, it is needed for the x440. + + If you don't have one of these computers, you should say N here. + If you want to build a NUMA kernel, you must select ACPI. + +config X86_BIGSMP + bool "Support for other sub-arch SMP systems with more than 8 CPUs" + depends on X86_32 && SMP + help + This option is needed for the systems that have more than 8 CPUs + and if the system is not of any sub-arch type above. + + If you don't have such a system, you should say N here. + +config X86_VISWS + bool "SGI 320/540 (Visual Workstation)" + depends on X86_32 + help + The SGI Visual Workstation series is an IA32-based workstation + based on SGI systems chips with some legacy PC hardware attached. + + Say Y here to create a kernel to run on the SGI 320 or 540. + + A kernel compiled for the Visual Workstation will not run on PCs + and vice versa. See <file:Documentation/sgi-visws.txt> for details. + +config X86_GENERICARCH + bool "Generic architecture (Summit, bigsmp, ES7000, default)" + depends on X86_32 + help + This option compiles in the Summit, bigsmp, ES7000, default subarchitectures. + It is intended for a generic binary kernel. + If you want a NUMA kernel, select ACPI. We need SRAT for NUMA. + +config X86_ES7000 + bool "Support for Unisys ES7000 IA32 series" + depends on X86_32 && SMP + help + Support for Unisys ES7000 systems. Say 'Y' here if this kernel is + supposed to run on an IA32-based Unisys ES7000 system. + Only choose this option if you have such a system, otherwise you + should say N here. + +config X86_VSMP + bool "Support for ScaleMP vSMP" + depends on X86_64 && PCI + help + Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is + supposed to run on these EM64T-based machines. Only choose this option + if you have one of these machines. + +endchoice + +config SCHED_NO_NO_OMIT_FRAME_POINTER + bool "Single-depth WCHAN output" + default y + depends on X86_32 + help + Calculate simpler /proc/<PID>/wchan values. If this option + is disabled then wchan values will recurse back to the + caller function. This provides more accurate wchan values, + at the expense of slightly more scheduling overhead. + + If in doubt, say "Y". + +config PARAVIRT + bool + depends on X86_32 && !(X86_VISWS || X86_VOYAGER) + help + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly + over full virtualization. However, when run without a hypervisor + the kernel is theoretically slower and slightly larger. + +menuconfig PARAVIRT_GUEST + bool "Paravirtualized guest support" + depends on X86_32 + help + Say Y here to get to see options related to running Linux under + various hypervisors. This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if PARAVIRT_GUEST + +source "arch/x86/xen/Kconfig" + +config VMI + bool "VMI Guest support" + select PARAVIRT + depends on !(X86_VISWS || X86_VOYAGER) + help + VMI provides a paravirtualized interface to the VMware ESX server + (it could be used by other hypervisors in theory too, but is not + at the moment), by linking the kernel to a GPL-ed ROM module + provided by the hypervisor. + +source "arch/x86/lguest/Kconfig" + +endif + +config ACPI_SRAT + bool + default y + depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH) + select ACPI_NUMA + +config HAVE_ARCH_PARSE_SRAT + bool + default y + depends on ACPI_SRAT + +config X86_SUMMIT_NUMA + bool + default y + depends on X86_32 && NUMA && (X86_SUMMIT || X86_GENERICARCH) + +config X86_CYCLONE_TIMER + bool + default y + depends on X86_32 && X86_SUMMIT || X86_GENERICARCH + +config ES7000_CLUSTERED_APIC + bool + default y + depends on SMP && X86_ES7000 && MPENTIUMIII + +source "arch/x86/Kconfig.cpu" + +config HPET_TIMER + bool + prompt "HPET Timer Support" if X86_32 + default X86_64 + help + Use the IA-PC HPET (High Precision Event Timer) to manage + time in preference to the PIT and RTC, if a HPET is + present. + HPET is the next generation timer replacing legacy 8254s. + The HPET provides a stable time base on SMP + systems, unlike the TSC, but it is more expensive to access, + as it is off-chip. You can find the HPET spec at + <http://www.intel.com/hardwaredesign/hpetspec.htm>. + + You can safely choose Y here. However, HPET will only be + activated if the platform and the BIOS support this feature. + Otherwise the 8254 will be used for timing services. + + Choose N to continue using the legacy 8254 timer. + +config HPET_EMULATE_RTC + bool + depends on HPET_TIMER && RTC=y + default y + +# Mark as embedded because too many people got it wrong. +# The code disables itself when not needed. +config GART_IOMMU + bool "GART IOMMU support" if EMBEDDED + default y + select SWIOTLB + select AGP + depends on X86_64 && PCI + help + Support for full DMA access of devices with 32bit memory access only + on systems with more than 3GB. This is usually needed for USB, + sound, many IDE/SATA chipsets and some other devices. + Provides a driver for the AMD Athlon64/Opteron/Turion/Sempron GART + based hardware IOMMU and a software bounce buffer based IOMMU used + on Intel systems and as fallback. + The code is only active when needed (enough memory and limited + device) unless CONFIG_IOMMU_DEBUG or iommu=force is specified + too. + +config CALGARY_IOMMU + bool "IBM Calgary IOMMU support" + select SWIOTLB + depends on X86_64 && PCI && EXPERIMENTAL + help + Support for hardware IOMMUs in IBM's xSeries x366 and x460 + systems. Needed to run systems with more than 3GB of memory + properly with 32-bit PCI devices that do not support DAC + (Double Address Cycle). Calgary also supports bus level + isolation, where all DMAs pass through the IOMMU. This + prevents them from going anywhere except their intended + destination. This catches hard-to-find kernel bugs and + mis-behaving drivers and devices that do not use the DMA-API + properly to set up their DMA buffers. The IOMMU can be + turned off at boot time with the iommu=off parameter. + Normally the kernel will make the right choice by itself. + If unsure, say Y. + +config CALGARY_IOMMU_ENABLED_BY_DEFAULT + bool "Should Calgary be enabled by default?" + default y + depends on CALGARY_IOMMU + help + Should Calgary be enabled by default? if you choose 'y', Calgary + will be used (if it exists). If you choose 'n', Calgary will not be + used even if it exists. If you choose 'n' and would like to use + Calgary anyway, pass 'iommu=calgary' on the kernel command line. + If unsure, say Y. + +# need this always selected by IOMMU for the VIA workaround +config SWIOTLB + bool + help + Support for software bounce buffers used on x86-64 systems + which don't have a hardware IOMMU (e.g. the current generation + of Intel's x86-64 CPUs). Using this PCI devices which can only + access 32-bits of memory can be used on systems with more than + 3 GB of memory. If unsure, say Y. + + +config NR_CPUS + int "Maximum number of CPUs (2-255)" + range 2 255 + depends on SMP + default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 + default "8" + help + This allows you to specify the maximum number of CPUs which this + kernel will support. The maximum supported value is 255 and the + minimum value which makes sense is 2. + + This is purely to save memory - each supported CPU adds + approximately eight kilobytes to the kernel image. + +config SCHED_SMT + bool "SMT (Hyperthreading) scheduler support" + depends on (X86_64 && SMP) || (X86_32 && X86_HT) + help + SMT scheduler support improves the CPU scheduler's decision making + when dealing with Intel Pentium 4 chips with HyperThreading at a + cost of slightly increased overhead in some places. If unsure say + N here. + +config SCHED_MC + bool "Multi-core scheduler support" + depends on (X86_64 && SMP) || (X86_32 && X86_HT) + default y + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + +source "kernel/Kconfig.preempt" + +config X86_UP_APIC + bool "Local APIC support on uniprocessors" + depends on X86_32 && !SMP && !(X86_VISWS || X86_VOYAGER || X86_GENERICARCH) + help + A local APIC (Advanced Programmable Interrupt Controller) is an + integrated interrupt controller in the CPU. If you have a single-CPU + system which has a processor with a local APIC, you can say Y here to + enable and use it. If you say Y here even though your machine doesn't + have a local APIC, then the kernel will still run with no slowdown at + all. The local APIC supports CPU-generated self-interrupts (timer, + performance counters), and the NMI watchdog which detects hard + lockups. + +config X86_UP_IOAPIC + bool "IO-APIC support on uniprocessors" + depends on X86_UP_APIC + help + An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an + SMP-capable replacement for PC-style interrupt controllers. Most + SMP systems and many recent uniprocessor systems have one. + + If you have a single-CPU system with an IO-APIC, you can say Y here + to use it. If you say Y here even though your machine doesn't have + an IO-APIC, then the kernel will still run with no slowdown at all. + +config X86_LOCAL_APIC + bool + depends on X86_64 || (X86_32 && (X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH)) + default y + +config X86_IO_APIC + bool + depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH)) + default y + +config X86_VISWS_APIC + bool + depends on X86_32 && X86_VISWS + default y + +config X86_MCE + bool "Machine Check Exception" + depends on !X86_VOYAGER + ---help--- + Machine Check Exception support allows the processor to notify the + kernel if it detects a problem (e.g. overheating, component failure). + The action the kernel takes depends on the severity of the problem, + ranging from a warning message on the console, to halting the machine. + Your processor must be a Pentium or newer to support this - check the + flags in /proc/cpuinfo for mce. Note that some older Pentium systems + have a design flaw which leads to false MCE events - hence MCE is + disabled on all P5 processors, unless explicitly enabled with "mce" + as a boot argument. Similarly, if MCE is built in and creates a + problem on some new non-standard machine, you can boot with "nomce" + to disable it. MCE support simply ignores non-MCE processors like + the 386 and 486, so nearly everyone can say Y here. + +config X86_MCE_INTEL + bool "Intel MCE features" + depends on X86_64 && X86_MCE && X86_LOCAL_APIC + default y + help + Additional support for intel specific MCE features such as + the thermal monitor. + +config X86_MCE_AMD + bool "AMD MCE features" + depends on X86_64 && X86_MCE && X86_LOCAL_APIC + default y + help + Additional support for AMD specific MCE features such as + the DRAM Error Threshold. + +config X86_MCE_NONFATAL + tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4" + depends on X86_32 && X86_MCE + help + Enabling this feature starts a timer that triggers every 5 seconds which + will look at the machine check registers to see if anything happened. + Non-fatal problems automatically get corrected (but still logged). + Disable this if you don't want to see these messages. + Seeing the messages this option prints out may be indicative of dying + or out-of-spec (ie, overclocked) hardware. + This option only does something on certain CPUs. + (AMD Athlon/Duron and Intel Pentium 4) + +config X86_MCE_P4THERMAL + bool "check for P4 thermal throttling interrupt." + depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP) && !X86_VISWS + help + Enabling this feature will cause a message to be printed when the P4 + enters thermal throttling. + +config VM86 + bool "Enable VM86 support" if EMBEDDED + default y + depends on X86_32 + help + This option is required by programs like DOSEMU to run 16-bit legacy + code on X86 processors. It also may be needed by software like + XFree86 to initialize some video cards via BIOS. Disabling this + option saves about 6k. + +config TOSHIBA + tristate "Toshiba Laptop support" + depends on X86_32 + ---help--- + This adds a driver to safely access the System Management Mode of + the CPU on Toshiba portables with a genuine Toshiba BIOS. It does + not work on models with a Phoenix BIOS. The System Management Mode + is used to set the BIOS and power saving options on Toshiba portables. + + For information on utilities to make use of this driver see the + Toshiba Linux utilities web site at: + <http://www.buzzard.org.uk/toshiba/>. + + Say Y if you intend to run this kernel on a Toshiba portable. + Say N otherwise. + +config I8K + tristate "Dell laptop support" + depends on X86_32 + ---help--- + This adds a driver to safely access the System Management Mode + of the CPU on the Dell Inspiron 8000. The System Management Mode + is used to read cpu temperature and cooling fan status and to + control the fans on the I8K portables. + + This driver has been tested only on the Inspiron 8000 but it may + also work with other Dell laptops. You can force loading on other + models by passing the parameter `force=1' to the module. Use at + your own risk. + + For information on utilities to make use of this driver see the + I8K Linux utilities web site at: + <http://people.debian.org/~dz/i8k/> + + Say Y if you intend to run this kernel on a Dell Inspiron 8000. + Say N otherwise. + +config X86_REBOOTFIXUPS + bool "Enable X86 board specific fixups for reboot" + depends on X86_32 && X86 + default n + ---help--- + This enables chipset and/or board specific fixups to be done + in order to get reboot to work correctly. This is only needed on + some combinations of hardware and BIOS. The symptom, for which + this config is intended, is when reboot ends with a stalled/hung + system. + + Currently, the only fixup is for the Geode machines using + CS5530A and CS5536 chipsets. + + Say Y if you want to enable the fixup. Currently, it's safe to + enable this option even if you don't need it. + Say N otherwise. + +config MICROCODE + tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support" + select FW_LOADER + ---help--- + If you say Y here, you will be able to update the microcode on + Intel processors in the IA32 family, e.g. Pentium Pro, Pentium II, + Pentium III, Pentium 4, Xeon etc. You will obviously need the + actual microcode binary data itself which is not shipped with the + Linux kernel. + + For latest news and information on obtaining all the required + ingredients for this driver, check: + <http://www.urbanmyth.org/microcode/>. + + To compile this driver as a module, choose M here: the + module will be called microcode. + +config MICROCODE_OLD_INTERFACE + bool + depends on MICROCODE + default y + +config X86_MSR + tristate "/dev/cpu/*/msr - Model-specific register support" + help + This device gives privileged processes access to the x86 + Model-Specific Registers (MSRs). It is a character device with + major 202 and minors 0 to 31 for /dev/cpu/0/msr to /dev/cpu/31/msr. + MSR accesses are directed to a specific CPU on multi-processor + systems. + +config X86_CPUID + tristate "/dev/cpu/*/cpuid - CPU information support" + help + This device gives processes access to the x86 CPUID instruction to + be executed on a specific processor. It is a character device + with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to + /dev/cpu/31/cpuid. + +choice + prompt "High Memory Support" + default HIGHMEM4G if !X86_NUMAQ + default HIGHMEM64G if X86_NUMAQ + depends on X86_32 + +config NOHIGHMEM + bool "off" + depends on !X86_NUMAQ + ---help--- + Linux can use up to 64 Gigabytes of physical memory on x86 systems. + However, the address space of 32-bit x86 processors is only 4 + Gigabytes large. That means that, if you have a large amount of + physical memory, not all of it can be "permanently mapped" by the + kernel. The physical memory that's not permanently mapped is called + "high memory". + + If you are compiling a kernel which will never run on a machine with + more than 1 Gigabyte total physical RAM, answer "off" here (default + choice and suitable for most users). This will result in a "3GB/1GB" + split: 3GB are mapped so that each process sees a 3GB virtual memory + space and the remaining part of the 4GB virtual memory space is used + by the kernel to permanently map as much physical memory as + possible. + + If the machine has between 1 and 4 Gigabytes physical RAM, then + answer "4GB" here. + + If more than 4 Gigabytes is used then answer "64GB" here. This + selection turns Intel PAE (Physical Address Extension) mode on. + PAE implements 3-level paging on IA32 processors. PAE is fully + supported by Linux, PAE mode is implemented on all recent Intel + processors (Pentium Pro and better). NOTE: If you say "64GB" here, + then the kernel will not boot on CPUs that don't support PAE! + + The actual amount of total physical memory will either be + auto detected or can be forced by using a kernel command line option + such as "mem=256M". (Try "man bootparam" or see the documentation of + your boot loader (lilo or loadlin) about how to pass options to the + kernel at boot time.) + + If unsure, say "off". + +config HIGHMEM4G + bool "4GB" + depends on !X86_NUMAQ + help + Select this if you have a 32-bit processor and between 1 and 4 + gigabytes of physical RAM. + +config HIGHMEM64G + bool "64GB" + depends on !M386 && !M486 + select X86_PAE + help + Select this if you have a 32-bit processor and more than 4 + gigabytes of physical RAM. + +endchoice + +choice + depends on EXPERIMENTAL + prompt "Memory split" if EMBEDDED + default VMSPLIT_3G + depends on X86_32 + help + Select the desired split between kernel and user memory. + + If the address range available to the kernel is less than the + physical memory installed, the remaining memory will be available + as "high memory". Accessing high memory is a little more costly + than low memory, as it needs to be mapped into the kernel first. + Note that increasing the kernel address space limits the range + available to user programs, making the address space there + tighter. Selecting anything other than the default 3G/1G split + will also likely make your kernel incompatible with binary-only + kernel modules. + + If you are not absolutely sure what you are doing, leave this + option alone! + + config VMSPLIT_3G + bool "3G/1G user/kernel split" + config VMSPLIT_3G_OPT + depends on !X86_PAE + bool "3G/1G user/kernel split (for full 1G low memory)" + config VMSPLIT_2G + bool "2G/2G user/kernel split" + config VMSPLIT_2G_OPT + depends on !X86_PAE + bool "2G/2G user/kernel split (for full 2G low memory)" + config VMSPLIT_1G + bool "1G/3G user/kernel split" +endchoice + +config PAGE_OFFSET + hex + default 0xB0000000 if VMSPLIT_3G_OPT + default 0x80000000 if VMSPLIT_2G + default 0x78000000 if VMSPLIT_2G_OPT + default 0x40000000 if VMSPLIT_1G + default 0xC0000000 + depends on X86_32 + +config HIGHMEM + bool + depends on X86_32 && (HIGHMEM64G || HIGHMEM4G) + default y + +config X86_PAE + bool "PAE (Physical Address Extension) Support" + default n + depends on X86_32 && !HIGHMEM4G + select RESOURCES_64BIT + help + PAE is required for NX support, and furthermore enables + larger swapspace support for non-overcommit purposes. It + has the cost of more pagetable lookup overhead, and also + consumes more pagetable space per process. + +# Common NUMA Features +config NUMA + bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" + depends on SMP + depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI) && EXPERIMENTAL) + default n if X86_PC + default y if (X86_NUMAQ || X86_SUMMIT) + help + Enable NUMA (Non Uniform Memory Access) support. + The kernel will try to allocate memory used by a CPU on the + local memory controller of the CPU and add some more + NUMA awareness to the kernel. + + For i386 this is currently highly experimental and should be only + used for kernel development. It might also cause boot failures. + For x86_64 this is recommended on all multiprocessor Opteron systems. + If the system is EM64T, you should say N unless your system is + EM64T NUMA. + +comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" + depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI) + +config K8_NUMA + bool "Old style AMD Opteron NUMA detection" + depends on X86_64 && NUMA && PCI + default y + help + Enable K8 NUMA node topology detection. You should say Y here if + you have a multi processor AMD K8 system. This uses an old + method to read the NUMA configuration directly from the builtin + Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA + instead, which also takes priority if both are compiled in. + +config X86_64_ACPI_NUMA + bool "ACPI NUMA detection" + depends on X86_64 && NUMA && ACPI && PCI + select ACPI_NUMA + default y + help + Enable ACPI SRAT based node topology detection. + +config NUMA_EMU + bool "NUMA emulation" + depends on X86_64 && NUMA + help + Enable NUMA emulation. A flat machine will be split + into virtual nodes when booted with "numa=fake=N", where N is the + number of nodes. This is only useful for debugging. + +config NODES_SHIFT + int + default "6" if X86_64 + default "4" if X86_NUMAQ + default "3" + depends on NEED_MULTIPLE_NODES + +config HAVE_ARCH_BOOTMEM_NODE + bool + depends on X86_32 && NUMA + default y + +config ARCH_HAVE_MEMORY_PRESENT + bool + depends on X86_32 && DISCONTIGMEM + default y + +config NEED_NODE_MEMMAP_SIZE + bool + depends on X86_32 && (DISCONTIGMEM || SPARSEMEM) + default y + +config HAVE_ARCH_ALLOC_REMAP + bool + depends on X86_32 && NUMA + default y + +config ARCH_FLATMEM_ENABLE + def_bool y + depends on (X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC) || (X86_64 && !NUMA) + +config ARCH_DISCONTIGMEM_ENABLE + def_bool y + depends on NUMA + +config ARCH_DISCONTIGMEM_DEFAULT + def_bool y + depends on NUMA + +config ARCH_SPARSEMEM_ENABLE + def_bool y + depends on NUMA || (EXPERIMENTAL && (X86_PC || X86_64)) + select SPARSEMEM_STATIC if X86_32 + select SPARSEMEM_VMEMMAP_ENABLE if X86_64 + +config ARCH_SELECT_MEMORY_MODEL + def_bool y + depends on X86_32 && ARCH_SPARSEMEM_ENABLE + +config ARCH_MEMORY_PROBE + def_bool X86_64 + depends on MEMORY_HOTPLUG + +source "mm/Kconfig" + +config HIGHPTE + bool "Allocate 3rd-level pagetables from highmem" + depends on X86_32 && (HIGHMEM4G || HIGHMEM64G) + help + The VM uses one page table entry for each page of physical memory. + For systems with a lot of RAM, this can be wasteful of precious + low memory. Setting this option will put user-space page table + entries in high memory. + +config MATH_EMULATION + bool + prompt "Math emulation" if X86_32 + ---help--- + Linux can emulate a math coprocessor (used for floating point + operations) if you don't have one. 486DX and Pentium processors have + a math coprocessor built in, 486SX and 386 do not, unless you added + a 487DX or 387, respectively. (The messages during boot time can + give you some hints here ["man dmesg"].) Everyone needs either a + coprocessor or this emulation. + + If you don't have a math coprocessor, you need to say Y here; if you + say Y here even though you have a coprocessor, the coprocessor will + be used nevertheless. (This behavior can be changed with the kernel + command line option "no387", which comes handy if your coprocessor + is broken. Try "man bootparam" or see the documentation of your boot + loader (lilo or loadlin) about how to pass options to the kernel at + boot time.) This means that it is a good idea to say Y here if you + intend to use this kernel on different machines. + + More information about the internals of the Linux math coprocessor + emulation can be found in <file:arch/x86/math-emu/README>. + + If you are not sure, say Y; apart from resulting in a 66 KB bigger + kernel, it won't hurt. + +config MTRR + bool "MTRR (Memory Type Range Register) support" + ---help--- + On Intel P6 family processors (Pentium Pro, Pentium II and later) + the Memory Type Range Registers (MTRRs) may be used to control + processor access to memory ranges. This is most useful if you have + a video (VGA) card on a PCI or AGP bus. Enabling write-combining + allows bus write transfers to be combined into a larger transfer + before bursting over the PCI/AGP bus. This can increase performance + of image write operations 2.5 times or more. Saying Y here creates a + /proc/mtrr file which may be used to manipulate your processor's + MTRRs. Typically the X server should use this. + + This code has a reasonably generic interface so that similar + control registers on other processors can be easily supported + as well: + + The Cyrix 6x86, 6x86MX and M II processors have Address Range + Registers (ARRs) which provide a similar functionality to MTRRs. For + these, the ARRs are used to emulate the MTRRs. + The AMD K6-2 (stepping 8 and above) and K6-3 processors have two + MTRRs. The Centaur C6 (WinChip) has 8 MCRs, allowing + write-combining. All of these processors are supported by this code + and it makes sense to say Y here if you have one of them. + + Saying Y here also fixes a problem with buggy SMP BIOSes which only + set the MTRRs for the boot CPU and not for the secondary CPUs. This + can lead to all sorts of problems, so it's good to say Y here. + + You can safely say Y even if your machine doesn't have MTRRs, you'll + just add about 9 KB to your kernel. + + See <file:Documentation/mtrr.txt> for more information. + +config EFI + bool "Boot from EFI support" + depends on X86_32 && ACPI + default n + ---help--- + This enables the kernel to boot on EFI platforms using + system configuration information passed to it from the firmware. + This also enables the kernel to use any EFI runtime services that are + available (such as the EFI variable services). + + This option is only useful on systems that have EFI firmware + and will result in a kernel image that is ~8k larger. In addition, + you must use the latest ELILO loader available at + <http://elilo.sourceforge.net> in order to take advantage of + kernel initialization using EFI information (neither GRUB nor LILO know + anything about EFI). However, even with this option, the resultant + kernel should continue to boot on existing non-EFI platforms. + +config IRQBALANCE + bool "Enable kernel irq balancing" + depends on X86_32 && SMP && X86_IO_APIC + default y + help + The default yes will allow the kernel to do irq load balancing. + Saying no will keep the kernel from doing irq load balancing. + +# turning this on wastes a bunch of space. +# Summit needs it only when NUMA is on +config BOOT_IOREMAP + bool + depends on X86_32 && (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI)) + default y + +config SECCOMP + bool "Enable seccomp to safely compute untrusted bytecode" + depends on PROC_FS + default y + help + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via /proc/<pid>/seccomp, it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + + If unsure, say Y. Only embedded should say N here. + +config CC_STACKPROTECTOR + bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" + depends on X86_64 && EXPERIMENTAL + help + This option turns on the -fstack-protector GCC feature. This + feature puts, at the beginning of critical functions, a canary + value on the stack just before the return address, and validates + the value just before actually returning. Stack based buffer + overflows (that need to overwrite this return address) now also + overwrite the canary, which gets detected and the attack is then + neutralized via a kernel panic. + + This feature requires gcc version 4.2 or above, or a distribution + gcc with the feature backported. Older versions are automatically + detected and for those versions, this configuration option is ignored. + +config CC_STACKPROTECTOR_ALL + bool "Use stack-protector for all functions" + depends on CC_STACKPROTECTOR + help + Normally, GCC only inserts the canary value protection for + functions that use large-ish on-stack buffers. By enabling + this option, GCC will be asked to do this for ALL functions. + +source kernel/Kconfig.hz + +config KEXEC + bool "kexec system call" + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is independent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similarity to the exec system call. + + It is an ongoing process to be certain the hardware in a machine + is properly shutdown, so do not be surprised if this code does not + initially work for you. It may help to enable device hotplugging + support. As of this writing the exact hardware interface is + strongly in flux, so no good recommendation can be made. + +config CRASH_DUMP + bool "kernel crash dumps (EXPERIMENTAL)" + depends on EXPERIMENTAL + depends on X86_64 || (X86_32 && HIGHMEM) + help + Generate crash dump after being started by kexec. + This should be normally only set in special crash dump kernels + which are loaded in the main kernel with kexec-tools into + a specially reserved region and then later executed after + a crash by kdump/kexec. The crash dump kernel must be compiled + to a memory address not used by the main kernel or BIOS using + PHYSICAL_START, or it must be built as a relocatable image + (CONFIG_RELOCATABLE=y). + For more details see Documentation/kdump/kdump.txt + +config PHYSICAL_START + hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) + default "0x1000000" if X86_NUMAQ + default "0x200000" if X86_64 + default "0x100000" + help + This gives the physical address where the kernel is loaded. + + If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then + bzImage will decompress itself to above physical address and + run from there. Otherwise, bzImage will run from the address where + it has been loaded by the boot loader and will ignore above physical + address. + + In normal kdump cases one does not have to set/change this option + as now bzImage can be compiled as a completely relocatable image + (CONFIG_RELOCATABLE=y) and be used to load and run from a different + address. This option is mainly useful for the folks who don't want + to use a bzImage for capturing the crash dump and want to use a + vmlinux instead. vmlinux is not relocatable hence a kernel needs + to be specifically compiled to run from a specific memory area + (normally a reserved region) and this option comes handy. + + So if you are using bzImage for capturing the crash dump, leave + the value here unchanged to 0x100000 and set CONFIG_RELOCATABLE=y. + Otherwise if you plan to use vmlinux for capturing the crash dump + change this value to start of the reserved region (Typically 16MB + 0x1000000). In other words, it can be set based on the "X" value as + specified in the "crashkernel=YM@XM" command line boot parameter + passed to the panic-ed kernel. Typically this parameter is set as + crashkernel=64M@16M. Please take a look at + Documentation/kdump/kdump.txt for more details about crash dumps. + + Usage of bzImage for capturing the crash dump is recommended as + one does not have to build two kernels. Same kernel can be used + as production kernel and capture kernel. Above option should have + gone away after relocatable bzImage support is introduced. But it + is present because there are users out there who continue to use + vmlinux for dump capture. This option should go away down the + line. + + Don't change this unless you know what you are doing. + +config RELOCATABLE + bool "Build a relocatable kernel (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + This builds a kernel image that retains relocation information + so it can be loaded someplace besides the default 1MB. + The relocations tend to make the kernel binary about 10% larger, + but are discarded at runtime. + + One use is for the kexec on panic case where the recovery kernel + must live at a different physical address than the primary + kernel. + + Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address + it has been loaded at and the compile time physical address + (CONFIG_PHYSICAL_START) is ignored. + +config PHYSICAL_ALIGN + hex + prompt "Alignment value to which kernel should be aligned" if X86_32 + default "0x100000" if X86_32 + default "0x200000" if X86_64 + range 0x2000 0x400000 + help + This value puts the alignment restrictions on physical address + where kernel is loaded and run from. Kernel is compiled for an + address which meets above alignment restriction. + + If bootloader loads the kernel at a non-aligned address and + CONFIG_RELOCATABLE is set, kernel will move itself to nearest + address aligned to above value and run from there. + + If bootloader loads the kernel at a non-aligned address and + CONFIG_RELOCATABLE is not set, kernel will ignore the run time + load address and decompress itself to the address it has been + compiled for and run from there. The address for which kernel is + compiled already meets above alignment restrictions. Hence the + end result is that kernel runs from a physical address meeting + above alignment restrictions. + + Don't change this unless you know what you are doing. + +config HOTPLUG_CPU + bool "Support for suspend on SMP and hot-pluggable CPUs (EXPERIMENTAL)" + depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER + ---help--- + Say Y here to experiment with turning CPUs off and on, and to + enable suspend on SMP systems. CPUs can be controlled through + /sys/devices/system/cpu. + Say N if you want to disable CPU hotplug and don't need to + suspend. + +config COMPAT_VDSO + bool "Compat VDSO support" + default y + depends on X86_32 + help + Map the VDSO to the predictable old-style address too. + ---help--- + Say N here if you are running a sufficiently recent glibc + version (2.3.3 or later), to remove the high-mapped + VDSO mapping and to exclusively use the randomized VDSO. + + If unsure, say Y. + +endmenu + +config ARCH_ENABLE_MEMORY_HOTPLUG + def_bool y + depends on X86_64 || (X86_32 && HIGHMEM) + +config MEMORY_HOTPLUG_RESERVE + def_bool X86_64 + depends on (MEMORY_HOTPLUG && DISCONTIGMEM) + +config HAVE_ARCH_EARLY_PFN_TO_NID + def_bool X86_64 + depends on NUMA + +config OUT_OF_LINE_PFN_TO_PAGE + def_bool X86_64 + depends on DISCONTIGMEM + +menu "Power management options" + depends on !X86_VOYAGER + +config ARCH_HIBERNATION_HEADER + bool + depends on X86_64 && HIBERNATION + default y + +source "kernel/power/Kconfig" + +source "drivers/acpi/Kconfig" + +menuconfig APM + tristate "APM (Advanced Power Management) BIOS support" + depends on X86_32 && PM_SLEEP && !X86_VISWS + ---help--- + APM is a BIOS specification for saving power using several different + techniques. This is mostly useful for battery powered laptops with + APM compliant BIOSes. If you say Y here, the system time will be + reset after a RESUME operation, the /proc/apm device will provide + battery status information, and user-space programs will receive + notification of APM "events" (e.g. battery status change). + + If you select "Y" here, you can disable actual use of the APM + BIOS by passing the "apm=off" option to the kernel at boot time. + + Note that the APM support is almost completely disabled for + machines with more than one CPU. + + In order to use APM, you will need supporting software. For location + and more information, read <file:Documentation/pm.txt> and the + Battery Powered Linux mini-HOWTO, available from + <http://www.tldp.org/docs.html#howto>. + + This driver does not spin down disk drives (see the hdparm(8) + manpage ("man 8 hdparm") for that), and it doesn't turn off + VESA-compliant "green" monitors. + + This driver does not support the TI 4000M TravelMate and the ACER + 486/DX4/75 because they don't have compliant BIOSes. Many "green" + desktop machines also don't have compliant BIOSes, and this driver + may cause those machines to panic during the boot phase. + + Generally, if you don't have a battery in your machine, there isn't + much point in using this driver and you should say N. If you get + random kernel OOPSes or reboots that don't seem to be related to + anything, try disabling/enabling this option (or disabling/enabling + APM in your BIOS). + + Some other things you should try when experiencing seemingly random, + "weird" problems: + + 1) make sure that you have enough swap space and that it is + enabled. + 2) pass the "no-hlt" option to the kernel + 3) switch on floating point emulation in the kernel and pass + the "no387" option to the kernel + 4) pass the "floppy=nodma" option to the kernel + 5) pass the "mem=4M" option to the kernel (thereby disabling + all but the first 4 MB of RAM) + 6) make sure that the CPU is not over clocked. + 7) read the sig11 FAQ at <http://www.bitwizard.nl/sig11/> + 8) disable the cache from your BIOS settings + 9) install a fan for the video card or exchange video RAM + 10) install a better fan for the CPU + 11) exchange RAM chips + 12) exchange the motherboard. + + To compile this driver as a module, choose M here: the + module will be called apm. + +if APM + +config APM_IGNORE_USER_SUSPEND + bool "Ignore USER SUSPEND" + help + This option will ignore USER SUSPEND requests. On machines with a + compliant APM BIOS, you want to say N. However, on the NEC Versa M + series notebooks, it is necessary to say Y because of a BIOS bug. + +config APM_DO_ENABLE + bool "Enable PM at boot time" + ---help--- + Enable APM features at boot time. From page 36 of the APM BIOS + specification: "When disabled, the APM BIOS does not automatically + power manage devices, enter the Standby State, enter the Suspend + State, or take power saving steps in response to CPU Idle calls." + This driver will make CPU Idle calls when Linux is idle (unless this + feature is turned off -- see "Do CPU IDLE calls", below). This + should always save battery power, but more complicated APM features + will be dependent on your BIOS implementation. You may need to turn + this option off if your computer hangs at boot time when using APM + support, or if it beeps continuously instead of suspending. Turn + this off if you have a NEC UltraLite Versa 33/C or a Toshiba + T400CDT. This is off by default since most machines do fine without + this feature. + +config APM_CPU_IDLE + bool "Make CPU Idle calls when idle" + help + Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop. + On some machines, this can activate improved power savings, such as + a slowed CPU clock rate, when the machine is idle. These idle calls + are made after the idle loop has run for some length of time (e.g., + 333 mS). On some machines, this will cause a hang at boot time or + whenever the CPU becomes idle. (On machines with more than one CPU, + this option does nothing.) + +config APM_DISPLAY_BLANK + bool "Enable console blanking using APM" + help + Enable console blanking using the APM. Some laptops can use this to + turn off the LCD backlight when the screen blanker of the Linux + virtual console blanks the screen. Note that this is only used by + the virtual console screen blanker, and won't turn off the backlight + when using the X Window system. This also doesn't have anything to + do with your VESA-compliant power-saving monitor. Further, this + option doesn't work for all laptops -- it might not turn off your + backlight at all, or it might print a lot of errors to the console, + especially if you are using gpm. + +config APM_ALLOW_INTS + bool "Allow interrupts during APM BIOS calls" + help + Normally we disable external interrupts while we are making calls to + the APM BIOS as a measure to lessen the effects of a badly behaving + BIOS implementation. The BIOS should reenable interrupts if it + needs to. Unfortunately, some BIOSes do not -- especially those in + many of the newer IBM Thinkpads. If you experience hangs when you + suspend, try setting this to Y. Otherwise, say N. + +config APM_REAL_MODE_POWER_OFF + bool "Use real mode APM BIOS call to power off" + help + Use real mode APM BIOS calls to switch off the computer. This is + a work-around for a number of buggy BIOSes. Switch this option on if + your computer crashes instead of powering off properly. + +endif # APM + +source "arch/x86/kernel/cpu/cpufreq/Kconfig" + +source "drivers/cpuidle/Kconfig" + +endmenu + + +menu "Bus options (PCI etc.)" + +config PCI + bool "PCI support" if !X86_VISWS + depends on !X86_VOYAGER + default y if X86_VISWS + select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC) + help + Find out whether you have a PCI motherboard. PCI is the name of a + bus system, i.e. the way the CPU talks to the other stuff inside + your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or + VESA. If you have PCI, say Y, otherwise N. + + The PCI-HOWTO, available from + <http://www.tldp.org/docs.html#howto>, contains valuable + information about which PCI hardware does work under Linux and which + doesn't. + +choice + prompt "PCI access mode" + depends on X86_32 && PCI && !X86_VISWS + default PCI_GOANY + ---help--- + On PCI systems, the BIOS can be used to detect the PCI devices and + determine their configuration. However, some old PCI motherboards + have BIOS bugs and may crash if this is done. Also, some embedded + PCI-based systems don't have any BIOS at all. Linux can also try to + detect the PCI hardware directly without using the BIOS. + + With this option, you can specify how Linux should detect the + PCI devices. If you choose "BIOS", the BIOS will be used, + if you choose "Direct", the BIOS won't be used, and if you + choose "MMConfig", then PCI Express MMCONFIG will be used. + If you choose "Any", the kernel will try MMCONFIG, then the + direct access method and falls back to the BIOS if that doesn't + work. If unsure, go with the default, which is "Any". + +config PCI_GOBIOS + bool "BIOS" + +config PCI_GOMMCONFIG + bool "MMConfig" + +config PCI_GODIRECT + bool "Direct" + +config PCI_GOANY + bool "Any" + +endchoice + +config PCI_BIOS + bool + depends on X86_32 && !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY) + default y + +# x86-64 doesn't support PCI BIOS access from long mode so always go direct. +config PCI_DIRECT + bool + depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY) || X86_VISWS) + default y + +config PCI_MMCONFIG + bool + depends on X86_32 && PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY) + default y + +config PCI_DOMAINS + bool + depends on PCI + default y + +config PCI_MMCONFIG + bool "Support mmconfig PCI config space access" + depends on X86_64 && PCI && ACPI + +config DMAR + bool "Support for DMA Remapping Devices (EXPERIMENTAL)" + depends on X86_64 && PCI_MSI && ACPI && EXPERIMENTAL + help + DMA remapping (DMAR) devices support enables independent address + translations for Direct Memory Access (DMA) from devices. + These DMA remapping devices are reported via ACPI tables + and include PCI device scope covered by these DMA + remapping devices. + +config DMAR_GFX_WA + bool "Support for Graphics workaround" + depends on DMAR + default y + help + Current Graphics drivers tend to use physical address + for DMA and avoid using DMA APIs. Setting this config + option permits the IOMMU driver to set a unity map for + all the OS-visible memory. Hence the driver can continue + to use physical addresses for DMA. + +config DMAR_FLOPPY_WA + bool + depends on DMAR + default y + help + Floppy disk drivers are know to bypass DMA API calls + thereby failing to work when IOMMU is enabled. This + workaround will setup a 1:1 mapping for the first + 16M to make floppy (an ISA device) work. + +source "drivers/pci/pcie/Kconfig" + +source "drivers/pci/Kconfig" + +# x86_64 have no ISA slots, but do have ISA-style DMA. +config ISA_DMA_API + bool + default y + +if X86_32 + +config ISA + bool "ISA support" + depends on !(X86_VOYAGER || X86_VISWS) + help + Find out whether you have ISA slots on your motherboard. ISA is the + name of a bus system, i.e. the way the CPU talks to the other stuff + inside your box. Other bus systems are PCI, EISA, MicroChannel + (MCA) or VESA. ISA is an older system, now being displaced by PCI; + newer boards don't support it. If you have ISA, say Y, otherwise N. + +config EISA + bool "EISA support" + depends on ISA + ---help--- + The Extended Industry Standard Architecture (EISA) bus was + developed as an open alternative to the IBM MicroChannel bus. + + The EISA bus provided some of the features of the IBM MicroChannel + bus while maintaining backward compatibility with cards made for + the older ISA bus. The EISA bus saw limited use between 1988 and + 1995 when it was made obsolete by the PCI bus. + + Say Y here if you are building a kernel for an EISA-based machine. + + Otherwise, say N. + +source "drivers/eisa/Kconfig" + +config MCA + bool "MCA support" if !(X86_VISWS || X86_VOYAGER) + default y if X86_VOYAGER + help + MicroChannel Architecture is found in some IBM PS/2 machines and + laptops. It is a bus system similar to PCI or ISA. See + <file:Documentation/mca.txt> (and especially the web page given + there) before attempting to build an MCA bus kernel. + +source "drivers/mca/Kconfig" + +config SCx200 + tristate "NatSemi SCx200 support" + depends on !X86_VOYAGER + help + This provides basic support for National Semiconductor's + (now AMD's) Geode processors. The driver probes for the + PCI-IDs of several on-chip devices, so its a good dependency + for other scx200_* drivers. + + If compiled as a module, the driver is named scx200. + +config SCx200HR_TIMER + tristate "NatSemi SCx200 27MHz High-Resolution Timer Support" + depends on SCx200 && GENERIC_TIME + default y + help + This driver provides a clocksource built upon the on-chip + 27MHz high-resolution timer. Its also a workaround for + NSC Geode SC-1100's buggy TSC, which loses time when the + processor goes idle (as is done by the scheduler). The + other workaround is idle=poll boot option. + +config GEODE_MFGPT_TIMER + bool "Geode Multi-Function General Purpose Timer (MFGPT) events" + depends on MGEODE_LX && GENERIC_TIME && GENERIC_CLOCKEVENTS + default y + help + This driver provides a clock event source based on the MFGPT + timer(s) in the CS5535 and CS5536 companion chip for the geode. + MFGPTs have a better resolution and max interval than the + generic PIT, and are suitable for use as high-res timers. + +endif # X86_32 + +config K8_NB + def_bool y + depends on AGP_AMD64 || (X86_64 && (GART_IOMMU || (PCI && NUMA))) + +source "drivers/pcmcia/Kconfig" + +source "drivers/pci/hotplug/Kconfig" + +endmenu + + +menu "Executable file formats / Emulations" + +source "fs/Kconfig.binfmt" + +config IA32_EMULATION + bool "IA32 Emulation" + depends on X86_64 + help + Include code to run 32-bit programs under a 64-bit kernel. You should + likely turn this on, unless you're 100% sure that you don't have any + 32-bit programs left. + +config IA32_AOUT + tristate "IA32 a.out support" + depends on IA32_EMULATION + help + Support old a.out binaries in the 32bit emulation. + +config COMPAT + bool + depends on IA32_EMULATION + default y + +config COMPAT_FOR_U64_ALIGNMENT + def_bool COMPAT + depends on X86_64 + +config SYSVIPC_COMPAT + bool + depends on X86_64 && COMPAT && SYSVIPC + default y + +endmenu + + +source "net/Kconfig" + +source "drivers/Kconfig" + +source "drivers/firmware/Kconfig" + +source "fs/Kconfig" + +source "kernel/Kconfig.instrumentation" + +source "arch/x86/Kconfig.debug" + +source "security/Kconfig" + +source "crypto/Kconfig" + +source "lib/Kconfig" diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu new file mode 100644 index 000000000000..c30162202dc4 --- /dev/null +++ b/arch/x86/Kconfig.cpu @@ -0,0 +1,401 @@ +# Put here option for CPU selection and depending optimization +if !X86_ELAN + +choice + prompt "Processor family" + default M686 if X86_32 + default GENERIC_CPU if X86_64 + +config M386 + bool "386" + depends on X86_32 && !UML + ---help--- + This is the processor type of your CPU. This information is used for + optimizing purposes. In order to compile a kernel that can run on + all x86 CPU types (albeit not optimally fast), you can specify + "386" here. + + The kernel will not necessarily run on earlier architectures than + the one you have chosen, e.g. a Pentium optimized kernel will run on + a PPro, but not necessarily on a i486. + + Here are the settings recommended for greatest speed: + - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI + 486DLC/DLC2, UMC 486SX-S and NexGen Nx586. Only "386" kernels + will run on a 386 class machine. + - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or + SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S. + - "586" for generic Pentium CPUs lacking the TSC + (time stamp counter) register. + - "Pentium-Classic" for the Intel Pentium. + - "Pentium-MMX" for the Intel Pentium MMX. + - "Pentium-Pro" for the Intel Pentium Pro. + - "Pentium-II" for the Intel Pentium II or pre-Coppermine Celeron. + - "Pentium-III" for the Intel Pentium III or Coppermine Celeron. + - "Pentium-4" for the Intel Pentium 4 or P4-based Celeron. + - "K6" for the AMD K6, K6-II and K6-III (aka K6-3D). + - "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird). + - "Crusoe" for the Transmeta Crusoe series. + - "Efficeon" for the Transmeta Efficeon series. + - "Winchip-C6" for original IDT Winchip. + - "Winchip-2" for IDT Winchip 2. + - "Winchip-2A" for IDT Winchips with 3dNow! capabilities. + - "GeodeGX1" for Geode GX1 (Cyrix MediaGX). + - "Geode GX/LX" For AMD Geode GX and LX processors. + - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. + - "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above). + - "VIA C7" for VIA C7. + + If you don't know what to do, choose "386". + +config M486 + bool "486" + depends on X86_32 + help + Select this for a 486 series processor, either Intel or one of the + compatible processors from AMD, Cyrix, IBM, or Intel. Includes DX, + DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or + U5S. + +config M586 + bool "586/K5/5x86/6x86/6x86MX" + depends on X86_32 + help + Select this for an 586 or 686 series processor such as the AMD K5, + the Cyrix 5x86, 6x86 and 6x86MX. This choice does not + assume the RDTSC (Read Time Stamp Counter) instruction. + +config M586TSC + bool "Pentium-Classic" + depends on X86_32 + help + Select this for a Pentium Classic processor with the RDTSC (Read + Time Stamp Counter) instruction for benchmarking. + +config M586MMX + bool "Pentium-MMX" + depends on X86_32 + help + Select this for a Pentium with the MMX graphics/multimedia + extended instructions. + +config M686 + bool "Pentium-Pro" + depends on X86_32 + help + Select this for Intel Pentium Pro chips. This enables the use of + Pentium Pro extended instructions, and disables the init-time guard + against the f00f bug found in earlier Pentiums. + +config MPENTIUMII + bool "Pentium-II/Celeron(pre-Coppermine)" + depends on X86_32 + help + Select this for Intel chips based on the Pentium-II and + pre-Coppermine Celeron core. This option enables an unaligned + copy optimization, compiles the kernel with optimization flags + tailored for the chip, and applies any applicable Pentium Pro + optimizations. + +config MPENTIUMIII + bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon" + depends on X86_32 + help + Select this for Intel chips based on the Pentium-III and + Celeron-Coppermine core. This option enables use of some + extended prefetch instructions in addition to the Pentium II + extensions. + +config MPENTIUMM + bool "Pentium M" + depends on X86_32 + help + Select this for Intel Pentium M (not Pentium-4 M) + notebook chips. + +config MPENTIUM4 + bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon" + depends on X86_32 + help + Select this for Intel Pentium 4 chips. This includes the + Pentium 4, Pentium D, P4-based Celeron and Xeon, and + Pentium-4 M (not Pentium M) chips. This option enables compile + flags optimized for the chip, uses the correct cache line size, and + applies any applicable optimizations. + + CPUIDs: F[0-6][1-A] (in /proc/cpuinfo show = cpu family : 15 ) + + Select this for: + Pentiums (Pentium 4, Pentium D, Celeron, Celeron D) corename: + -Willamette + -Northwood + -Mobile Pentium 4 + -Mobile Pentium 4 M + -Extreme Edition (Gallatin) + -Prescott + -Prescott 2M + -Cedar Mill + -Presler + -Smithfiled + Xeons (Intel Xeon, Xeon MP, Xeon LV, Xeon MV) corename: + -Foster + -Prestonia + -Gallatin + -Nocona + -Irwindale + -Cranford + -Potomac + -Paxville + -Dempsey + + +config MK6 + bool "K6/K6-II/K6-III" + depends on X86_32 + help + Select this for an AMD K6-family processor. Enables use of + some extended instructions, and passes appropriate optimization + flags to GCC. + +config MK7 + bool "Athlon/Duron/K7" + depends on X86_32 + help + Select this for an AMD Athlon K7-family processor. Enables use of + some extended instructions, and passes appropriate optimization + flags to GCC. + +config MK8 + bool "Opteron/Athlon64/Hammer/K8" + help + Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables + use of some extended instructions, and passes appropriate optimization + flags to GCC. + +config MCRUSOE + bool "Crusoe" + depends on X86_32 + help + Select this for a Transmeta Crusoe processor. Treats the processor + like a 586 with TSC, and sets some GCC optimization flags (like a + Pentium Pro with no alignment requirements). + +config MEFFICEON + bool "Efficeon" + depends on X86_32 + help + Select this for a Transmeta Efficeon processor. + +config MWINCHIPC6 + bool "Winchip-C6" + depends on X86_32 + help + Select this for an IDT Winchip C6 chip. Linux and GCC + treat this chip as a 586TSC with some extended instructions + and alignment requirements. + +config MWINCHIP2 + bool "Winchip-2" + depends on X86_32 + help + Select this for an IDT Winchip-2. Linux and GCC + treat this chip as a 586TSC with some extended instructions + and alignment requirements. + +config MWINCHIP3D + bool "Winchip-2A/Winchip-3" + depends on X86_32 + help + Select this for an IDT Winchip-2A or 3. Linux and GCC + treat this chip as a 586TSC with some extended instructions + and alignment requirements. Also enable out of order memory + stores for this CPU, which can increase performance of some + operations. + +config MGEODEGX1 + bool "GeodeGX1" + depends on X86_32 + help + Select this for a Geode GX1 (Cyrix MediaGX) chip. + +config MGEODE_LX + bool "Geode GX/LX" + depends on X86_32 + help + Select this for AMD Geode GX and LX processors. + +config MCYRIXIII + bool "CyrixIII/VIA-C3" + depends on X86_32 + help + Select this for a Cyrix III or C3 chip. Presently Linux and GCC + treat this chip as a generic 586. Whilst the CPU is 686 class, + it lacks the cmov extension which gcc assumes is present when + generating 686 code. + Note that Nehemiah (Model 9) and above will not boot with this + kernel due to them lacking the 3DNow! instructions used in earlier + incarnations of the CPU. + +config MVIAC3_2 + bool "VIA C3-2 (Nehemiah)" + depends on X86_32 + help + Select this for a VIA C3 "Nehemiah". Selecting this enables usage + of SSE and tells gcc to treat the CPU as a 686. + Note, this kernel will not boot on older (pre model 9) C3s. + +config MVIAC7 + bool "VIA C7" + depends on X86_32 + help + Select this for a VIA C7. Selecting this uses the correct cache + shift and tells gcc to treat the CPU as a 686. + +config MPSC + bool "Intel P4 / older Netburst based Xeon" + depends on X86_64 + help + Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey + Xeon CPUs with Intel 64bit which is compatible with x86-64. + Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the + Netburst core and shouldn't use this option. You can distinguish them + using the cpu family field + in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. + +config MCORE2 + bool "Core 2/newer Xeon" + help + Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and 53xx) + CPUs. You can distinguish newer from older Xeons by the CPU family + in /proc/cpuinfo. Newer ones have 6 and older ones 15 (not a typo) + +config GENERIC_CPU + bool "Generic-x86-64" + depends on X86_64 + help + Generic x86-64 CPU. + Run equally well on all x86-64 CPUs. + +endchoice + +config X86_GENERIC + bool "Generic x86 support" + depends on X86_32 + help + Instead of just including optimizations for the selected + x86 variant (e.g. PII, Crusoe or Athlon), include some more + generic optimizations as well. This will make the kernel + perform better on x86 CPUs other than that selected. + + This is really intended for distributors who need more + generic optimizations. + +endif + +# +# Define implied options from the CPU selection here +config X86_L1_CACHE_BYTES + int + default "128" if GENERIC_CPU || MPSC + default "64" if MK8 || MCORE2 + depends on X86_64 + +config X86_INTERNODE_CACHE_BYTES + int + default "4096" if X86_VSMP + default X86_L1_CACHE_BYTES if !X86_VSMP + depends on X86_64 + +config X86_CMPXCHG + def_bool X86_64 || (X86_32 && !M386) + +config X86_L1_CACHE_SHIFT + int + default "7" if MPENTIUM4 || X86_GENERIC || GENERIC_CPU || MPSC + default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 + default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX + default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 + +config X86_XADD + bool + depends on X86_32 && !M386 + default y + +config X86_PPRO_FENCE + bool + depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1 + default y + +config X86_F00F_BUG + bool + depends on M586MMX || M586TSC || M586 || M486 || M386 + default y + +config X86_WP_WORKS_OK + bool + depends on X86_32 && !M386 + default y + +config X86_INVLPG + bool + depends on X86_32 && !M386 + default y + +config X86_BSWAP + bool + depends on X86_32 && !M386 + default y + +config X86_POPAD_OK + bool + depends on X86_32 && !M386 + default y + +config X86_ALIGNMENT_16 + bool + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 + default y + +config X86_GOOD_APIC + bool + depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2 || MVIAC7 || X86_64 + default y + +config X86_INTEL_USERCOPY + bool + depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 + default y + +config X86_USE_PPRO_CHECKSUM + bool + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 + default y + +config X86_USE_3DNOW + bool + depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML + default y + +config X86_OOSTORE + bool + depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR + default y + +config X86_TSC + bool + depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 + default y + +# this should be set for all -march=.. options where the compiler +# generates cmov. +config X86_CMOV + bool + depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7) + default y + +config X86_MINIMUM_CPU_FAMILY + int + default "64" if X86_64 + default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) + default "3" + diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug new file mode 100644 index 000000000000..761ca7b5f120 --- /dev/null +++ b/arch/x86/Kconfig.debug @@ -0,0 +1,115 @@ +menu "Kernel hacking" + +config TRACE_IRQFLAGS_SUPPORT + def_bool y + +source "lib/Kconfig.debug" + +config EARLY_PRINTK + bool "Early printk" if EMBEDDED && DEBUG_KERNEL && X86_32 + default y + help + Write kernel log output directly into the VGA buffer or to a serial + port. + + This is useful for kernel debugging when your machine crashes very + early before the console code is initialized. For normal operation + it is not recommended because it looks ugly and doesn't cooperate + with klogd/syslogd or the X server. You should normally N here, + unless you want to debug such a crash. + +config DEBUG_STACKOVERFLOW + bool "Check for stack overflows" + depends on DEBUG_KERNEL + help + This option will cause messages to be printed if free stack space + drops below a certain limit. + +config DEBUG_STACK_USAGE + bool "Stack utilization instrumentation" + depends on DEBUG_KERNEL + help + Enables the display of the minimum amount of free stack which each + task has ever had available in the sysrq-T and sysrq-P debug output. + + This option will slow down process creation somewhat. + +comment "Page alloc debug is incompatible with Software Suspend on i386" + depends on DEBUG_KERNEL && HIBERNATION + depends on X86_32 + +config DEBUG_PAGEALLOC + bool "Debug page memory allocations" + depends on DEBUG_KERNEL && !HIBERNATION && !HUGETLBFS + depends on X86_32 + help + Unmap pages from the kernel linear mapping after free_pages(). + This results in a large slowdown, but helps to find certain types + of memory corruptions. + +config DEBUG_RODATA + bool "Write protect kernel read-only data structures" + depends on DEBUG_KERNEL + help + Mark the kernel read-only data as write-protected in the pagetables, + in order to catch accidental (and incorrect) writes to such const + data. This option may have a slight performance impact because a + portion of the kernel code won't be covered by a 2MB TLB anymore. + If in doubt, say "N". + +config 4KSTACKS + bool "Use 4Kb for kernel stacks instead of 8Kb" + depends on DEBUG_KERNEL + depends on X86_32 + help + If you say Y here the kernel will use a 4Kb stacksize for the + kernel stack attached to each process/thread. This facilitates + running more threads on a system and also reduces the pressure + on the VM subsystem for higher order allocations. This option + will also use IRQ stacks to compensate for the reduced stackspace. + +config X86_FIND_SMP_CONFIG + def_bool y + depends on X86_LOCAL_APIC || X86_VOYAGER + depends on X86_32 + +config X86_MPPARSE + def_bool y + depends on X86_LOCAL_APIC && !X86_VISWS + depends on X86_32 + +config DOUBLEFAULT + default y + bool "Enable doublefault exception handler" if EMBEDDED + depends on X86_32 + help + This option allows trapping of rare doublefault exceptions that + would otherwise cause a system to silently reboot. Disabling this + option saves about 4k and might cause you much additional grey + hair. + +config IOMMU_DEBUG + bool "Enable IOMMU debugging" + depends on GART_IOMMU && DEBUG_KERNEL + depends on X86_64 + help + Force the IOMMU to on even when you have less than 4GB of + memory and add debugging code. On overflow always panic. And + allow to enable IOMMU leak tracing. Can be disabled at boot + time with iommu=noforce. This will also enable scatter gather + list merging. Currently not recommended for production + code. When you use it make sure you have a big enough + IOMMU/AGP aperture. Most of the options enabled by this can + be set more finegrained using the iommu= command line + options. See Documentation/x86_64/boot-options.txt for more + details. + +config IOMMU_LEAK + bool "IOMMU leak tracing" + depends on DEBUG_KERNEL + depends on IOMMU_DEBUG + help + Add a simple leak tracer to the IOMMU code. This is useful when you + are debugging a buggy device driver that leaks IOMMU mappings. + +endmenu diff --git a/arch/x86/Makefile b/arch/x86/Makefile new file mode 100644 index 000000000000..116b03a45636 --- /dev/null +++ b/arch/x86/Makefile @@ -0,0 +1,20 @@ +# Unified Makefile for i386 and x86_64 + +# select defconfig based on actual architecture +ifeq ($(ARCH),x86) + KBUILD_DEFCONFIG := i386_defconfig +else + KBUILD_DEFCONFIG := $(ARCH)_defconfig +endif + +# No need to remake these files +$(srctree)/arch/x86/Makefile%: ; + +ifeq ($(CONFIG_X86_32),y) + include $(srctree)/arch/x86/Makefile_32 +else + include $(srctree)/arch/x86/Makefile_64 +endif + + + diff --git a/arch/x86/Makefile_32 b/arch/x86/Makefile_32 new file mode 100644 index 000000000000..50394da2f6c1 --- /dev/null +++ b/arch/x86/Makefile_32 @@ -0,0 +1,175 @@ +# +# i386 Makefile +# +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. Remember to do have actions +# for "archclean" cleaning up for this architecture. +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1994 by Linus Torvalds +# +# 19990713 Artur Skawina <skawina@geocities.com> +# Added '-march' and '-mpreferred-stack-boundary' support +# +# 20050320 Kianusch Sayah Karadji <kianusch@sk-tech.net> +# Added support for GEODE CPU + +# BITS is used as extension for files which are available in a 32 bit +# and a 64 bit version to simplify shared Makefiles. +# e.g.: obj-y += foo_$(BITS).o +BITS := 32 +export BITS + +HAS_BIARCH := $(call cc-option-yn, -m32) +ifeq ($(HAS_BIARCH),y) +AS := $(AS) --32 +LD := $(LD) -m elf_i386 +CC := $(CC) -m32 +endif + +LDFLAGS := -m elf_i386 +OBJCOPYFLAGS := -O binary -R .note -R .comment -S +ifdef CONFIG_RELOCATABLE +LDFLAGS_vmlinux := --emit-relocs +endif +CHECKFLAGS += -D__i386__ + +KBUILD_CFLAGS += -pipe -msoft-float -mregparm=3 -freg-struct-return + +# prevent gcc from keeping the stack 16 byte aligned +KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2) + +# CPU-specific tuning. Anything which can be shared with UML should go here. +include $(srctree)/arch/x86/Makefile_32.cpu + +# temporary until string.h is fixed +cflags-y += -ffreestanding + +# this works around some issues with generating unwind tables in older gccs +# newer gccs do it by default +cflags-y += -maccumulate-outgoing-args + +# Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use +# a lot more stack due to the lack of sharing of stacklots: +KBUILD_CFLAGS += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;) + +# do binutils support CFI? +cflags-y += $(call as-instr,.cfi_startproc\n.cfi_rel_offset esp${comma}0\n.cfi_endproc,-DCONFIG_AS_CFI=1,) +KBUILD_AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_rel_offset esp${comma}0\n.cfi_endproc,-DCONFIG_AS_CFI=1,) + +# is .cfi_signal_frame supported too? +cflags-y += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,) +KBUILD_AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,) + +KBUILD_CFLAGS += $(cflags-y) + +# Default subarch .c files +mcore-y := arch/x86/mach-default + +# Voyager subarch support +mflags-$(CONFIG_X86_VOYAGER) := -Iinclude/asm-x86/mach-voyager +mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager + +# VISWS subarch support +mflags-$(CONFIG_X86_VISWS) := -Iinclude/asm-x86/mach-visws +mcore-$(CONFIG_X86_VISWS) := arch/x86/mach-visws + +# NUMAQ subarch support +mflags-$(CONFIG_X86_NUMAQ) := -Iinclude/asm-x86/mach-numaq +mcore-$(CONFIG_X86_NUMAQ) := arch/x86/mach-default + +# BIGSMP subarch support +mflags-$(CONFIG_X86_BIGSMP) := -Iinclude/asm-x86/mach-bigsmp +mcore-$(CONFIG_X86_BIGSMP) := arch/x86/mach-default + +#Summit subarch support +mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-x86/mach-summit +mcore-$(CONFIG_X86_SUMMIT) := arch/x86/mach-default + +# generic subarchitecture +mflags-$(CONFIG_X86_GENERICARCH) := -Iinclude/asm-x86/mach-generic +mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default +core-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ + +# ES7000 subarch support +mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-x86/mach-es7000 +mcore-$(CONFIG_X86_ES7000) := arch/x86/mach-default +core-$(CONFIG_X86_ES7000) := arch/x86/mach-es7000/ + +# Xen paravirtualization support +core-$(CONFIG_XEN) += arch/x86/xen/ + +# lguest paravirtualization support +core-$(CONFIG_LGUEST_GUEST) += arch/x86/lguest/ + +# default subarch .h files +mflags-y += -Iinclude/asm-x86/mach-default + +head-y := arch/x86/kernel/head_32.o arch/x86/kernel/init_task.o + +libs-y += arch/x86/lib/ +core-y += arch/x86/kernel/ \ + arch/x86/mm/ \ + $(mcore-y)/ \ + arch/x86/crypto/ +drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/ +drivers-$(CONFIG_PCI) += arch/x86/pci/ +# must be linked after kernel/ +drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/ +drivers-$(CONFIG_PM) += arch/x86/power/ +drivers-$(CONFIG_FB) += arch/x86/video/ + +KBUILD_CFLAGS += $(mflags-y) +KBUILD_AFLAGS += $(mflags-y) + +boot := arch/x86/boot + +PHONY += zImage bzImage compressed zlilo bzlilo \ + zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install + +all: bzImage + +# KBUILD_IMAGE specify target image being built + KBUILD_IMAGE := $(boot)/bzImage +zImage zlilo zdisk: KBUILD_IMAGE := arch/x86/boot/zImage + +zImage bzImage: vmlinux + $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) + $(Q)mkdir -p $(objtree)/arch/i386/boot + $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/i386/boot/bzImage + +compressed: zImage + +zlilo bzlilo: vmlinux + $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zlilo + +zdisk bzdisk: vmlinux + $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zdisk + +fdimage fdimage144 fdimage288 isoimage: vmlinux + $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@ + +install: + $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install + +archclean: + $(Q)rm -rf $(objtree)/arch/i386/boot + $(Q)$(MAKE) $(clean)=arch/x86/boot + +define archhelp + echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' + echo ' install - Install kernel using' + echo ' (your) ~/bin/installkernel or' + echo ' (distribution) /sbin/installkernel or' + echo ' install to $$(INSTALL_PATH) and run lilo' + echo ' bzdisk - Create a boot floppy in /dev/fd0' + echo ' fdimage - Create a boot floppy image' + echo ' isoimage - Create a boot CD-ROM image' +endef + +CLEAN_FILES += arch/x86/boot/fdimage \ + arch/x86/boot/image.iso \ + arch/x86/boot/mtools.conf diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu new file mode 100644 index 000000000000..e372b584e919 --- /dev/null +++ b/arch/x86/Makefile_32.cpu @@ -0,0 +1,47 @@ +# CPU tuning section - shared with UML. +# Must change only cflags-y (or [yn]), not CFLAGS! That makes a difference for UML. + +#-mtune exists since gcc 3.4 +HAS_MTUNE := $(call cc-option-yn, -mtune=i386) +ifeq ($(HAS_MTUNE),y) +tune = $(call cc-option,-mtune=$(1),$(2)) +else +tune = $(call cc-option,-mcpu=$(1),$(2)) +endif + +align := $(cc-option-align) +cflags-$(CONFIG_M386) += -march=i386 +cflags-$(CONFIG_M486) += -march=i486 +cflags-$(CONFIG_M586) += -march=i586 +cflags-$(CONFIG_M586TSC) += -march=i586 +cflags-$(CONFIG_M586MMX) += -march=pentium-mmx +cflags-$(CONFIG_M686) += -march=i686 +cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call tune,pentium2) +cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call tune,pentium3) +cflags-$(CONFIG_MPENTIUMM) += -march=i686 $(call tune,pentium3) +cflags-$(CONFIG_MPENTIUM4) += -march=i686 $(call tune,pentium4) +cflags-$(CONFIG_MK6) += -march=k6 +# Please note, that patches that add -march=athlon-xp and friends are pointless. +# They make zero difference whatsosever to performance at this time. +cflags-$(CONFIG_MK7) += -march=athlon +cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon) +cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 +cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 +cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) +cflags-$(CONFIG_MWINCHIP2) += $(call cc-option,-march=winchip2,-march=i586) +cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) +cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 +cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) +cflags-$(CONFIG_MVIAC7) += -march=i686 +cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) + +# AMD Elan support +cflags-$(CONFIG_X86_ELAN) += -march=i486 + +# Geode GX1 support +cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx + +# add at the end to overwrite eventual tuning options from earlier +# cpu entries +cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) + diff --git a/arch/x86/Makefile_64 b/arch/x86/Makefile_64 new file mode 100644 index 000000000000..a804860022e6 --- /dev/null +++ b/arch/x86/Makefile_64 @@ -0,0 +1,144 @@ +# +# x86_64 Makefile +# +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. Remember to do have actions +# for "archclean" and "archdep" for cleaning up and making dependencies for +# this architecture +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1994 by Linus Torvalds +# +# 19990713 Artur Skawina <skawina@geocities.com> +# Added '-march' and '-mpreferred-stack-boundary' support +# 20000913 Pavel Machek <pavel@suse.cz> +# Converted for x86_64 architecture +# 20010105 Andi Kleen, add IA32 compiler. +# ....and later removed it again.... +# +# $Id: Makefile,v 1.31 2002/03/22 15:56:07 ak Exp $ + +# BITS is used as extension for files which are available in a 32 bit +# and a 64 bit version to simplify shared Makefiles. +# e.g.: obj-y += foo_$(BITS).o +BITS := 64 +export BITS + +LDFLAGS := -m elf_x86_64 +OBJCOPYFLAGS := -O binary -R .note -R .comment -S +LDFLAGS_vmlinux := +CHECKFLAGS += -D__x86_64__ -m64 + +cflags-y := +cflags-kernel-y := +cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) +cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) +# gcc doesn't support -march=core2 yet as of gcc 4.3, but I hope it +# will eventually. Use -mtune=generic as fallback +cflags-$(CONFIG_MCORE2) += \ + $(call cc-option,-march=core2,$(call cc-option,-mtune=generic)) +cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) + +cflags-y += -m64 +cflags-y += -mno-red-zone +cflags-y += -mcmodel=kernel +cflags-y += -pipe +cflags-y += -Wno-sign-compare +cflags-y += -fno-asynchronous-unwind-tables +ifneq ($(CONFIG_DEBUG_INFO),y) +# -fweb shrinks the kernel a bit, but the difference is very small +# it also messes up debugging, so don't use it for now. +#cflags-y += $(call cc-option,-fweb) +endif +# -funit-at-a-time shrinks the kernel .text considerably +# unfortunately it makes reading oopses harder. +cflags-y += $(call cc-option,-funit-at-a-time) +# prevent gcc from generating any FP code by mistake +cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) +# this works around some issues with generating unwind tables in older gccs +# newer gccs do it by default +cflags-y += -maccumulate-outgoing-args + +# do binutils support CFI? +cflags-y += $(call as-instr,.cfi_startproc\n.cfi_rel_offset rsp${comma}0\n.cfi_endproc,-DCONFIG_AS_CFI=1,) +KBUILD_AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_rel_offset rsp${comma}0\n.cfi_endproc,-DCONFIG_AS_CFI=1,) + +# is .cfi_signal_frame supported too? +cflags-y += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,) +KBUILD_AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,) + +cflags-$(CONFIG_CC_STACKPROTECTOR) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh "$(CC)" -fstack-protector ) +cflags-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh "$(CC)" -fstack-protector-all ) + +KBUILD_CFLAGS += $(cflags-y) +CFLAGS_KERNEL += $(cflags-kernel-y) +KBUILD_AFLAGS += -m64 + +head-y := arch/x86/kernel/head_64.o arch/x86/kernel/head64.o arch/x86/kernel/init_task.o + +libs-y += arch/x86/lib/ +core-y += arch/x86/kernel/ \ + arch/x86/mm/ \ + arch/x86/crypto/ \ + arch/x86/vdso/ +core-$(CONFIG_IA32_EMULATION) += arch/x86/ia32/ +drivers-$(CONFIG_PCI) += arch/x86/pci/ +drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/ + +boot := arch/x86/boot + +PHONY += bzImage bzlilo install archmrproper \ + fdimage fdimage144 fdimage288 isoimage archclean + +#Default target when executing "make" +all: bzImage + +BOOTIMAGE := arch/x86/boot/bzImage +KBUILD_IMAGE := $(BOOTIMAGE) + +bzImage: vmlinux + $(Q)$(MAKE) $(build)=$(boot) $(BOOTIMAGE) + $(Q)mkdir -p $(objtree)/arch/x86_64/boot + $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/x86_64/boot/bzImage + +bzlilo: vmlinux + $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) zlilo + +bzdisk: vmlinux + $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) zdisk + +fdimage fdimage144 fdimage288 isoimage: vmlinux + $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@ + +install: vdso_install + $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@ + +vdso_install: +ifeq ($(CONFIG_IA32_EMULATION),y) + $(Q)$(MAKE) $(build)=arch/x86/ia32 $@ +endif + $(Q)$(MAKE) $(build)=arch/x86/vdso $@ + +archclean: + $(Q)rm -rf $(objtree)/arch/x86_64/boot + $(Q)$(MAKE) $(clean)=$(boot) + +define archhelp + echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' + echo ' install - Install kernel using' + echo ' (your) ~/bin/installkernel or' + echo ' (distribution) /sbin/installkernel or' + echo ' install to $$(INSTALL_PATH) and run lilo' + echo ' bzdisk - Create a boot floppy in /dev/fd0' + echo ' fdimage - Create a boot floppy image' + echo ' isoimage - Create a boot CD-ROM image' +endef + +CLEAN_FILES += arch/x86/boot/fdimage \ + arch/x86/boot/image.iso \ + arch/x86/boot/mtools.conf + + diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index cb1035f2b7e9..7a3116ccf387 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -39,6 +39,7 @@ setup-y += printf.o string.o tty.o video.o version.o voyager.o setup-y += video-vga.o setup-y += video-vesa.o setup-y += video-bios.o + targets += $(setup-y) hostprogs-y := tools/build @@ -48,10 +49,10 @@ HOSTCFLAGS_build.o := $(LINUXINCLUDE) # How to compile the 16-bit code. Note we always compile for -march=i386, # that way we can complain to the user if the CPU is insufficient. -cflags-i386 := -cflags-x86_64 := -m32 -CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \ - $(cflags-$(ARCH)) \ +cflags-$(CONFIG_X86_32) := +cflags-$(CONFIG_X86_64) := -m32 +KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \ + $(cflags-y) \ -Wall -Wstrict-prototypes \ -march=i386 -mregparm=3 \ -include $(srctree)/$(src)/code16gcc.h \ @@ -61,13 +62,13 @@ CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \ $(call cc-option, -fno-unit-at-a-time)) \ $(call cc-option, -fno-stack-protector) \ $(call cc-option, -mpreferred-stack-boundary=2) -AFLAGS := $(CFLAGS) -D__ASSEMBLY__ +KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ $(obj)/zImage: IMAGE_OFFSET := 0x1000 -$(obj)/zImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK) +$(obj)/zImage: asflags-y := $(SVGA_MODE) $(RAMDISK) $(obj)/bzImage: IMAGE_OFFSET := 0x100000 -$(obj)/bzImage: EXTRA_CFLAGS := -D__BIG_KERNEL__ -$(obj)/bzImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__ +$(obj)/bzImage: ccflags-y := -D__BIG_KERNEL__ +$(obj)/bzImage: asflags-y := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__ $(obj)/bzImage: BUILDFLAGS := -b quiet_cmd_image = BUILD $@ diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 20bab9431acb..d2b5adf46512 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -17,13 +17,15 @@ #ifndef BOOT_BOOT_H #define BOOT_BOOT_H +#define STACK_SIZE 512 /* Minimum number of bytes for stack */ + #ifndef __ASSEMBLY__ #include <stdarg.h> #include <linux/types.h> #include <linux/edd.h> #include <asm/boot.h> -#include <asm/bootparam.h> +#include <asm/setup.h> /* Useful macros */ #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) @@ -198,8 +200,6 @@ static inline int isdigit(int ch) } /* Heap -- available for dynamic lists. */ -#define STACK_SIZE 512 /* Minimum number of bytes for stack */ - extern char _end[]; extern char *HEAP; extern char *heap_end; @@ -216,9 +216,9 @@ static inline char *__get_heap(size_t s, size_t a, size_t n) #define GET_HEAP(type, n) \ ((type *)__get_heap(sizeof(type),__alignof__(type),(n))) -static inline int heap_free(void) +static inline bool heap_free(size_t n) { - return heap_end-HEAP; + return (int)(heap_end-HEAP) >= (int)n; } /* copy.S */ diff --git a/arch/x86/boot/compressed/Makefile_32 b/arch/x86/boot/compressed/Makefile_32 index 22613c652d22..e43ff7c56e6e 100644 --- a/arch/x86/boot/compressed/Makefile_32 +++ b/arch/x86/boot/compressed/Makefile_32 @@ -11,7 +11,7 @@ EXTRA_AFLAGS := -traditional LDFLAGS_vmlinux := -T hostprogs-y := relocs -CFLAGS := -m32 -D__KERNEL__ $(LINUX_INCLUDE) -O2 \ +KBUILD_CFLAGS := -m32 -D__KERNEL__ $(LINUX_INCLUDE) -O2 \ -fno-strict-aliasing -fPIC \ $(call cc-option,-ffreestanding) \ $(call cc-option,-fno-stack-protector) diff --git a/arch/x86/boot/compressed/Makefile_64 b/arch/x86/boot/compressed/Makefile_64 index dc6b3380cc45..7801e8dd90b2 100644 --- a/arch/x86/boot/compressed/Makefile_64 +++ b/arch/x86/boot/compressed/Makefile_64 @@ -6,11 +6,11 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz head_64.o misc_64.o piggy.o -CFLAGS := -m64 -D__KERNEL__ $(LINUXINCLUDE) -O2 \ +KBUILD_CFLAGS := -m64 -D__KERNEL__ $(LINUXINCLUDE) -O2 \ -fno-strict-aliasing -fPIC -mcmodel=small \ $(call cc-option, -ffreestanding) \ $(call cc-option, -fno-stack-protector) -AFLAGS := $(CFLAGS) -D__ASSEMBLY__ +KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ LDFLAGS := -m elf_x86_64 LDFLAGS_vmlinux := -T diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index f35ea2237522..036e635f18a3 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -27,12 +27,18 @@ #include <asm/segment.h> #include <asm/page.h> #include <asm/boot.h> +#include <asm/asm-offsets.h> .section ".text.head","ax",@progbits .globl startup_32 startup_32: cld + /* test KEEP_SEGMENTS flag to see if the bootloader is asking + * us to not reload segments */ + testb $(1<<6), BP_loadflags(%esi) + jnz 1f + cli movl $(__BOOT_DS),%eax movl %eax,%ds @@ -40,6 +46,7 @@ startup_32: movl %eax,%fs movl %eax,%gs movl %eax,%ss +1: /* Calculate the delta between where we were compiled to run * at and where we were actually loaded at. This can only be done diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 49467640751f..1ccb38a7f0d2 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -29,6 +29,7 @@ #include <asm/pgtable.h> #include <asm/page.h> #include <asm/msr.h> +#include <asm/asm-offsets.h> .section ".text.head" .code32 @@ -36,11 +37,17 @@ startup_32: cld + /* test KEEP_SEGMENTS flag to see if the bootloader is asking + * us to not reload segments */ + testb $(1<<6), BP_loadflags(%esi) + jnz 1f + cli movl $(__KERNEL_DS), %eax movl %eax, %ds movl %eax, %es movl %eax, %ss +1: /* Calculate the delta between where we were compiled to run * at and where we were actually loaded at. This can only be done diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c index b28505c544c9..b74d60d1b2fa 100644 --- a/arch/x86/boot/compressed/misc_32.c +++ b/arch/x86/boot/compressed/misc_32.c @@ -25,7 +25,7 @@ /* * Getting to provable safe in place decompression is hard. - * Worst case behaviours need to be analized. + * Worst case behaviours need to be analyzed. * Background information: * * The file layout is: @@ -94,7 +94,7 @@ * Adding 32768 instead of 32767 just makes for round numbers. * Adding the decompressor_size is necessary as it musht live after all * of the data as well. Last I measured the decompressor is about 14K. - * 10K of actuall data and 4K of bss. + * 10K of actual data and 4K of bss. * */ @@ -247,6 +247,9 @@ static void putstr(const char *s) int x,y,pos; char c; + if (RM_SCREEN_INFO.orig_video_mode == 0 && lines == 0 && cols == 0) + return; + x = RM_SCREEN_INFO.orig_x; y = RM_SCREEN_INFO.orig_y; diff --git a/arch/x86/boot/compressed/misc_64.c b/arch/x86/boot/compressed/misc_64.c index f932b0e89096..6ea015aa65e4 100644 --- a/arch/x86/boot/compressed/misc_64.c +++ b/arch/x86/boot/compressed/misc_64.c @@ -25,7 +25,7 @@ /* * Getting to provable safe in place decompression is hard. - * Worst case behaviours need to be analized. + * Worst case behaviours need to be analyzed. * Background information: * * The file layout is: @@ -94,7 +94,7 @@ * Adding 32768 instead of 32767 just makes for round numbers. * Adding the decompressor_size is necessary as it musht live after all * of the data as well. Last I measured the decompressor is about 14K. - * 10K of actuall data and 4K of bss. + * 10K of actual data and 4K of bss. * */ diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c index 2d77ee728f92..7a0d00b2cf28 100644 --- a/arch/x86/boot/compressed/relocs.c +++ b/arch/x86/boot/compressed/relocs.c @@ -38,11 +38,9 @@ static const char* safe_abs_relocs[] = { static int is_safe_abs_reloc(const char* sym_name) { - int i, array_size; - - array_size = sizeof(safe_abs_relocs)/sizeof(char*); + int i; - for(i = 0; i < array_size; i++) { + for(i = 0; i < ARRAY_SIZE(safe_abs_relocs); i++) { if (!strcmp(sym_name, safe_abs_relocs[i])) /* Match found */ return 1; diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index e655a89c5510..769065bd23d7 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -42,13 +42,7 @@ static struct cpu_features cpu; static u32 cpu_vendor[3]; static u32 err_flags[NCAPINTS]; -#ifdef CONFIG_X86_64 -static const int req_level = 64; -#elif defined(CONFIG_X86_MINIMUM_CPU_FAMILY) static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY; -#else -static const int req_level = 3; -#endif static const u32 req_flags[NCAPINTS] = { diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index f3140e596d40..6ef5a060fa11 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -119,7 +119,7 @@ _start: # Part 2 of the header, from the old setup.S .ascii "HdrS" # header signature - .word 0x0206 # header version number (>= 0x0105) + .word 0x0207 # header version number (>= 0x0105) # or else old loadlin-1.5 will fail) .globl realmode_swtch realmode_swtch: .word 0, 0 # default_switch, SETUPSEG @@ -173,7 +173,8 @@ ramdisk_size: .long 0 # its size in bytes bootsect_kludge: .long 0 # obsolete -heap_end_ptr: .word _end+1024 # (Header version 0x0201 or later) +heap_end_ptr: .word _end+STACK_SIZE-512 + # (Header version 0x0201 or later) # space from here (exclusive) down to # end of setup code can be used by setup # for local heap purposes. @@ -214,6 +215,11 @@ cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, #added with boot protocol #version 2.06 +hardware_subarch: .long 0 # subarchitecture, added with 2.07 + # default to 0 for normal x86 PC + +hardware_subarch_data: .quad 0 + # End of setup header ##################################################### .section ".inittext", "ax" @@ -225,28 +231,53 @@ start_of_setup: int $0x13 #endif -# We will have entered with %cs = %ds+0x20, normalize %cs so -# it is on par with the other segments. - pushw %ds - pushw $setup2 - lretw - -setup2: # Force %es = %ds movw %ds, %ax movw %ax, %es cld -# Stack paranoia: align the stack and make sure it is good -# for both 16- and 32-bit references. In particular, if we -# were meant to have been using the full 16-bit segment, the -# caller might have set %sp to zero, which breaks %esp-based -# references. - andw $~3, %sp # dword align (might as well...) - jnz 1f - movw $0xfffc, %sp # Make sure we're not zero -1: movzwl %sp, %esp # Clear upper half of %esp - sti +# Apparently some ancient versions of LILO invoked the kernel +# with %ss != %ds, which happened to work by accident for the +# old code. If the CAN_USE_HEAP flag is set in loadflags, or +# %ss != %ds, then adjust the stack pointer. + + # Smallest possible stack we can tolerate + movw $(_end+STACK_SIZE), %cx + + movw heap_end_ptr, %dx + addw $512, %dx + jnc 1f + xorw %dx, %dx # Wraparound - whole segment available +1: testb $CAN_USE_HEAP, loadflags + jnz 2f + + # No CAN_USE_HEAP + movw %ss, %dx + cmpw %ax, %dx # %ds == %ss? + movw %sp, %dx + # If so, assume %sp is reasonably set, otherwise use + # the smallest possible stack. + jne 4f # -> Smallest possible stack... + + # Make sure the stack is at least minimum size. Take a value + # of zero to mean "full segment." +2: + andw $~3, %dx # dword align (might as well...) + jnz 3f + movw $0xfffc, %dx # Make sure we're not zero +3: cmpw %cx, %dx + jnb 5f +4: movw %cx, %dx # Minimum value we can possibly use +5: movw %ax, %ss + movzwl %dx, %esp # Clear upper half of %esp + sti # Now we should have a working stack + +# We will have entered with %cs = %ds+0x20, normalize %cs so +# it is on par with the other segments. + pushw %ds + pushw $6f + lretw +6: # Check signature at end of setup cmpl $0x5a5aaa55, setup_sig diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 0eeef3989a17..1f95750ede28 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c @@ -26,8 +26,6 @@ char *heap_end = _end; /* Default end of heap = no heap */ * screws up the old-style command line protocol, adjust by * filling in the new-style command line pointer instead. */ -#define OLD_CL_MAGIC 0xA33F -#define OLD_CL_ADDRESS 0x20 static void copy_boot_params(void) { diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S index 2e559233725a..fa6bed1fac14 100644 --- a/arch/x86/boot/pmjump.S +++ b/arch/x86/boot/pmjump.S @@ -28,17 +28,19 @@ * void protected_mode_jump(u32 entrypoint, u32 bootparams); */ protected_mode_jump: - xorl %ebx, %ebx # Flag to indicate this is a boot movl %edx, %esi # Pointer to boot_params table movl %eax, 2f # Patch ljmpl instruction - jmp 1f # Short jump to flush instruction q. -1: movw $__BOOT_DS, %cx + xorl %ebx, %ebx # Per the 32-bit boot protocol + xorl %ebp, %ebp # Per the 32-bit boot protocol + xorl %edi, %edi # Per the 32-bit boot protocol movl %cr0, %edx orb $1, %dl # Protected mode (PE) bit movl %edx, %cr0 + jmp 1f # Short jump to serialize on 386/486 +1: movw %cx, %ds movw %cx, %es diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c index 68e65d95cdfd..ed0672a81870 100644 --- a/arch/x86/boot/video-bios.c +++ b/arch/x86/boot/video-bios.c @@ -79,7 +79,7 @@ static int bios_probe(void) video_bios.modes = GET_HEAP(struct mode_info, 0); for (mode = 0x14; mode <= 0x7f; mode++) { - if (heap_free() < sizeof(struct mode_info)) + if (!heap_free(sizeof(struct mode_info))) break; if (mode_defined(VIDEO_FIRST_BIOS+mode)) diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index 192190710710..4716b9a96357 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c @@ -57,7 +57,7 @@ static int vesa_probe(void) while ((mode = rdfs16(mode_ptr)) != 0xffff) { mode_ptr += 2; - if (heap_free() < sizeof(struct mode_info)) + if (!heap_free(sizeof(struct mode_info))) break; /* Heap full, can't save mode info */ if (mode & ~0x1ff) diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index e4ba897bf9a3..ad9712f01739 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c @@ -371,7 +371,7 @@ static void save_screen(void) saved.curx = boot_params.screen_info.orig_x; saved.cury = boot_params.screen_info.orig_y; - if (heap_free() < saved.x*saved.y*sizeof(u16)+512) + if (!heap_free(saved.x*saved.y*sizeof(u16)+512)) return; /* Not enough heap to save the screen */ saved.data = GET_HEAP(u16, saved.x*saved.y); diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig new file mode 100644 index 000000000000..54ee1764fdae --- /dev/null +++ b/arch/x86/configs/i386_defconfig @@ -0,0 +1,1466 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.22-git14 +# Fri Jul 20 09:53:15 2007 +# +CONFIG_X86_32=y +CONFIG_GENERIC_TIME=y +CONFIG_CLOCKSOURCE_WATCHDOG=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_SEMAPHORE_SLEEPERS=y +CONFIG_X86=y +CONFIG_MMU=y +CONFIG_ZONE_DMA=y +CONFIG_QUICKLIST=y +CONFIG_GENERIC_ISA_DMA=y +CONFIG_GENERIC_IOMAP=y +CONFIG_GENERIC_BUG=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y +CONFIG_DMI=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 + +# +# General setup +# +CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +CONFIG_POSIX_MQUEUE=y +# CONFIG_BSD_PROCESS_ACCT is not set +# CONFIG_TASKSTATS is not set +# CONFIG_USER_NS is not set +# CONFIG_AUDIT is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=18 +# CONFIG_CPUSETS is not set +CONFIG_SYSFS_DEPRECATED=y +CONFIG_RELAY=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y +# CONFIG_EMBEDDED is not set +CONFIG_UID16=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_ANON_INODES=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_SLUB_DEBUG=y +# CONFIG_SLAB is not set +CONFIG_SLUB=y +# CONFIG_SLOB is not set +CONFIG_RT_MUTEXES=y +# CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +# CONFIG_KMOD is not set +CONFIG_STOP_MACHINE=y +CONFIG_BLOCK=y +CONFIG_LBD=y +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_LSF is not set +# CONFIG_BLK_DEV_BSG is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" + +# +# Processor type and features +# +CONFIG_TICK_ONESHOT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_SMP=y +# CONFIG_X86_PC is not set +# CONFIG_X86_ELAN is not set +# CONFIG_X86_VOYAGER is not set +# CONFIG_X86_NUMAQ is not set +# CONFIG_X86_SUMMIT is not set +# CONFIG_X86_BIGSMP is not set +# CONFIG_X86_VISWS is not set +CONFIG_X86_GENERICARCH=y +# CONFIG_X86_ES7000 is not set +# CONFIG_PARAVIRT is not set +CONFIG_X86_CYCLONE_TIMER=y +# CONFIG_M386 is not set +# CONFIG_M486 is not set +# CONFIG_M586 is not set +# CONFIG_M586TSC is not set +# CONFIG_M586MMX is not set +# CONFIG_M686 is not set +# CONFIG_MPENTIUMII is not set +CONFIG_MPENTIUMIII=y +# CONFIG_MPENTIUMM is not set +# CONFIG_MCORE2 is not set +# CONFIG_MPENTIUM4 is not set +# CONFIG_MK6 is not set +# CONFIG_MK7 is not set +# CONFIG_MK8 is not set +# CONFIG_MCRUSOE is not set +# CONFIG_MEFFICEON is not set +# CONFIG_MWINCHIPC6 is not set +# CONFIG_MWINCHIP2 is not set +# CONFIG_MWINCHIP3D is not set +# CONFIG_MGEODEGX1 is not set +# CONFIG_MGEODE_LX is not set +# CONFIG_MCYRIXIII is not set +# CONFIG_MVIAC3_2 is not set +# CONFIG_MVIAC7 is not set +CONFIG_X86_GENERIC=y +CONFIG_X86_CMPXCHG=y +CONFIG_X86_L1_CACHE_SHIFT=7 +CONFIG_X86_XADD=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_X86_WP_WORKS_OK=y +CONFIG_X86_INVLPG=y +CONFIG_X86_BSWAP=y +CONFIG_X86_POPAD_OK=y +CONFIG_X86_GOOD_APIC=y +CONFIG_X86_INTEL_USERCOPY=y +CONFIG_X86_USE_PPRO_CHECKSUM=y +CONFIG_X86_TSC=y +CONFIG_X86_CMOV=y +CONFIG_X86_MINIMUM_CPU_FAMILY=4 +CONFIG_HPET_TIMER=y +CONFIG_HPET_EMULATE_RTC=y +CONFIG_NR_CPUS=32 +CONFIG_SCHED_SMT=y +CONFIG_SCHED_MC=y +# CONFIG_PREEMPT_NONE is not set +CONFIG_PREEMPT_VOLUNTARY=y +# CONFIG_PREEMPT is not set +CONFIG_PREEMPT_BKL=y +CONFIG_X86_LOCAL_APIC=y +CONFIG_X86_IO_APIC=y +CONFIG_X86_MCE=y +CONFIG_X86_MCE_NONFATAL=y +CONFIG_X86_MCE_P4THERMAL=y +CONFIG_VM86=y +# CONFIG_TOSHIBA is not set +# CONFIG_I8K is not set +# CONFIG_X86_REBOOTFIXUPS is not set +CONFIG_MICROCODE=y +CONFIG_MICROCODE_OLD_INTERFACE=y +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y + +# +# Firmware Drivers +# +# CONFIG_EDD is not set +# CONFIG_DELL_RBU is not set +# CONFIG_DCDBAS is not set +CONFIG_DMIID=y +# CONFIG_NOHIGHMEM is not set +CONFIG_HIGHMEM4G=y +# CONFIG_HIGHMEM64G is not set +CONFIG_PAGE_OFFSET=0xC0000000 +CONFIG_HIGHMEM=y +CONFIG_ARCH_POPULATES_NODE_MAP=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPLIT_PTLOCK_CPUS=4 +CONFIG_RESOURCES_64BIT=y +CONFIG_ZONE_DMA_FLAG=1 +CONFIG_BOUNCE=y +CONFIG_NR_QUICK=1 +CONFIG_VIRT_TO_BUS=y +# CONFIG_HIGHPTE is not set +# CONFIG_MATH_EMULATION is not set +CONFIG_MTRR=y +# CONFIG_EFI is not set +# CONFIG_IRQBALANCE is not set +CONFIG_SECCOMP=y +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_300 is not set +# CONFIG_HZ_1000 is not set +CONFIG_HZ=250 +# CONFIG_KEXEC is not set +# CONFIG_CRASH_DUMP is not set +CONFIG_PHYSICAL_START=0x100000 +# CONFIG_RELOCATABLE is not set +CONFIG_PHYSICAL_ALIGN=0x100000 +# CONFIG_HOTPLUG_CPU is not set +CONFIG_COMPAT_VDSO=y +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y + +# +# Power management options (ACPI, APM) +# +CONFIG_PM=y +CONFIG_PM_LEGACY=y +# CONFIG_PM_DEBUG is not set + +# +# ACPI (Advanced Configuration and Power Interface) Support +# +CONFIG_ACPI=y +CONFIG_ACPI_PROCFS=y +CONFIG_ACPI_AC=y +CONFIG_ACPI_BATTERY=y +CONFIG_ACPI_BUTTON=y +CONFIG_ACPI_FAN=y +# CONFIG_ACPI_DOCK is not set +CONFIG_ACPI_PROCESSOR=y +CONFIG_ACPI_THERMAL=y +# CONFIG_ACPI_ASUS is not set +# CONFIG_ACPI_TOSHIBA is not set +CONFIG_ACPI_BLACKLIST_YEAR=2001 +CONFIG_ACPI_DEBUG=y +CONFIG_ACPI_EC=y +CONFIG_ACPI_POWER=y +CONFIG_ACPI_SYSTEM=y +CONFIG_X86_PM_TIMER=y +# CONFIG_ACPI_CONTAINER is not set +# CONFIG_ACPI_SBS is not set +# CONFIG_APM is not set + +# +# CPU Frequency scaling +# +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_TABLE=y +CONFIG_CPU_FREQ_DEBUG=y +CONFIG_CPU_FREQ_STAT=y +# CONFIG_CPU_FREQ_STAT_DETAILS is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y + +# +# CPUFreq processor drivers +# +CONFIG_X86_ACPI_CPUFREQ=y +# CONFIG_X86_POWERNOW_K6 is not set +# CONFIG_X86_POWERNOW_K7 is not set +CONFIG_X86_POWERNOW_K8=y +CONFIG_X86_POWERNOW_K8_ACPI=y +# CONFIG_X86_GX_SUSPMOD is not set +# CONFIG_X86_SPEEDSTEP_CENTRINO is not set +# CONFIG_X86_SPEEDSTEP_ICH is not set +# CONFIG_X86_SPEEDSTEP_SMI is not set +# CONFIG_X86_P4_CLOCKMOD is not set +# CONFIG_X86_CPUFREQ_NFORCE2 is not set +# CONFIG_X86_LONGRUN is not set +# CONFIG_X86_LONGHAUL is not set +# CONFIG_X86_E_POWERSAVER is not set + +# +# shared options +# +CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y +# CONFIG_X86_SPEEDSTEP_LIB is not set + +# +# Bus options (PCI, PCMCIA, EISA, MCA, ISA) +# +CONFIG_PCI=y +# CONFIG_PCI_GOBIOS is not set +# CONFIG_PCI_GOMMCONFIG is not set +# CONFIG_PCI_GODIRECT is not set +CONFIG_PCI_GOANY=y +CONFIG_PCI_BIOS=y +CONFIG_PCI_DIRECT=y +CONFIG_PCI_MMCONFIG=y +# CONFIG_PCIEPORTBUS is not set +CONFIG_ARCH_SUPPORTS_MSI=y +CONFIG_PCI_MSI=y +# CONFIG_PCI_DEBUG is not set +# CONFIG_HT_IRQ is not set +CONFIG_ISA_DMA_API=y +# CONFIG_ISA is not set +# CONFIG_MCA is not set +# CONFIG_SCx200 is not set +CONFIG_K8_NB=y + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set +# CONFIG_HOTPLUG_PCI is not set + +# +# Executable file formats +# +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_AOUT is not set +# CONFIG_BINFMT_MISC is not set + +# +# Networking +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +# CONFIG_XFRM_SUB_POLICY is not set +# CONFIG_XFRM_MIGRATE is not set +# CONFIG_NET_KEY is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +# CONFIG_IP_PNP_BOOTP is not set +# CONFIG_IP_PNP_RARP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +CONFIG_INET_TUNNEL=y +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +# CONFIG_INET_XFRM_MODE_BEET is not set +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set +CONFIG_IPV6=y +# CONFIG_IPV6_PRIVACY is not set +# CONFIG_IPV6_ROUTER_PREF is not set +# CONFIG_IPV6_OPTIMISTIC_DAD is not set +# CONFIG_INET6_AH is not set +# CONFIG_INET6_ESP is not set +# CONFIG_INET6_IPCOMP is not set +# CONFIG_IPV6_MIP6 is not set +# CONFIG_INET6_XFRM_TUNNEL is not set +# CONFIG_INET6_TUNNEL is not set +CONFIG_INET6_XFRM_MODE_TRANSPORT=y +CONFIG_INET6_XFRM_MODE_TUNNEL=y +# CONFIG_INET6_XFRM_MODE_BEET is not set +# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set +CONFIG_IPV6_SIT=y +# CONFIG_IPV6_TUNNEL is not set +# CONFIG_IPV6_MULTIPLE_TABLES is not set +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETFILTER is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_NET_TCPPROBE is not set +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set + +# +# Wireless +# +# CONFIG_CFG80211 is not set +# CONFIG_WIRELESS_EXT is not set +# CONFIG_MAC80211 is not set +# CONFIG_IEEE80211 is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +CONFIG_FW_LOADER=y +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set +# CONFIG_SYS_HYPERVISOR is not set +# CONFIG_CONNECTOR is not set +# CONFIG_MTD is not set +# CONFIG_PARPORT is not set +CONFIG_PNP=y +# CONFIG_PNP_DEBUG is not set + +# +# Protocols +# +CONFIG_PNPACPI=y +CONFIG_BLK_DEV=y +CONFIG_BLK_DEV_FD=y +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=y +# CONFIG_BLK_DEV_CRYPTOLOOP is not set +# CONFIG_BLK_DEV_NBD is not set +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_UB is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set +CONFIG_MISC_DEVICES=y +# CONFIG_IBM_ASM is not set +# CONFIG_PHANTOM is not set +# CONFIG_EEPROM_93CX6 is not set +# CONFIG_SGI_IOC4 is not set +# CONFIG_TIFM_CORE is not set +# CONFIG_SONY_LAPTOP is not set +# CONFIG_THINKPAD_ACPI is not set +CONFIG_IDE=y +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_IDE_SATA is not set +# CONFIG_BLK_DEV_HD_IDE is not set +CONFIG_BLK_DEV_IDEDISK=y +CONFIG_IDEDISK_MULTI_MODE=y +CONFIG_BLK_DEV_IDECD=y +# CONFIG_BLK_DEV_IDETAPE is not set +# CONFIG_BLK_DEV_IDEFLOPPY is not set +# CONFIG_BLK_DEV_IDESCSI is not set +CONFIG_BLK_DEV_IDEACPI=y +# CONFIG_IDE_TASK_IOCTL is not set +CONFIG_IDE_PROC_FS=y + +# +# IDE chipset support/bugfixes +# +CONFIG_IDE_GENERIC=y +# CONFIG_BLK_DEV_CMD640 is not set +# CONFIG_BLK_DEV_IDEPNP is not set +CONFIG_BLK_DEV_IDEPCI=y +# CONFIG_IDEPCI_SHARE_IRQ is not set +CONFIG_IDEPCI_PCIBUS_ORDER=y +# CONFIG_BLK_DEV_OFFBOARD is not set +# CONFIG_BLK_DEV_GENERIC is not set +# CONFIG_BLK_DEV_OPTI621 is not set +# CONFIG_BLK_DEV_RZ1000 is not set +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +# CONFIG_IDEDMA_ONLYDISK is not set +# CONFIG_BLK_DEV_AEC62XX is not set +# CONFIG_BLK_DEV_ALI15X3 is not set +CONFIG_BLK_DEV_AMD74XX=y +# CONFIG_BLK_DEV_ATIIXP is not set +# CONFIG_BLK_DEV_CMD64X is not set +# CONFIG_BLK_DEV_TRIFLEX is not set +# CONFIG_BLK_DEV_CY82C693 is not set +# CONFIG_BLK_DEV_CS5520 is not set +# CONFIG_BLK_DEV_CS5530 is not set +# CONFIG_BLK_DEV_CS5535 is not set +# CONFIG_BLK_DEV_HPT34X is not set +# CONFIG_BLK_DEV_HPT366 is not set +# CONFIG_BLK_DEV_JMICRON is not set +# CONFIG_BLK_DEV_SC1200 is not set +CONFIG_BLK_DEV_PIIX=y +# CONFIG_BLK_DEV_IT8213 is not set +# CONFIG_BLK_DEV_IT821X is not set +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_PDC202XX_OLD is not set +# CONFIG_BLK_DEV_PDC202XX_NEW is not set +# CONFIG_BLK_DEV_SVWKS is not set +# CONFIG_BLK_DEV_SIIMAGE is not set +# CONFIG_BLK_DEV_SIS5513 is not set +# CONFIG_BLK_DEV_SLC90E66 is not set +# CONFIG_BLK_DEV_TRM290 is not set +# CONFIG_BLK_DEV_VIA82CXXX is not set +# CONFIG_BLK_DEV_TC86C001 is not set +# CONFIG_IDE_ARM is not set +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_IDEDMA_IVB is not set +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +CONFIG_SCSI=y +CONFIG_SCSI_DMA=y +# CONFIG_SCSI_TGT is not set +CONFIG_SCSI_NETLINK=y +# CONFIG_SCSI_PROC_FS is not set + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +# CONFIG_CHR_DEV_ST is not set +# CONFIG_CHR_DEV_OSST is not set +CONFIG_BLK_DEV_SR=y +# CONFIG_BLK_DEV_SR_VENDOR is not set +CONFIG_CHR_DEV_SG=y +# CONFIG_CHR_DEV_SCH is not set + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +# CONFIG_SCSI_MULTI_LUN is not set +# CONFIG_SCSI_CONSTANTS is not set +# CONFIG_SCSI_LOGGING is not set +# CONFIG_SCSI_SCAN_ASYNC is not set +CONFIG_SCSI_WAIT_SCAN=m + +# +# SCSI Transports +# +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_FC_ATTRS=y +# CONFIG_SCSI_ISCSI_ATTRS is not set +# CONFIG_SCSI_SAS_ATTRS is not set +# CONFIG_SCSI_SAS_LIBSAS is not set + +# +# SCSI low-level drivers +# +# CONFIG_ISCSI_TCP is not set +CONFIG_BLK_DEV_3W_XXXX_RAID=y +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +CONFIG_SCSI_AIC7XXX=y +CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 +CONFIG_AIC7XXX_RESET_DELAY_MS=5000 +CONFIG_AIC7XXX_DEBUG_ENABLE=y +CONFIG_AIC7XXX_DEBUG_MASK=0 +CONFIG_AIC7XXX_REG_PRETTY_PRINT=y +# CONFIG_SCSI_AIC7XXX_OLD is not set +CONFIG_SCSI_AIC79XX=y +CONFIG_AIC79XX_CMDS_PER_DEVICE=32 +CONFIG_AIC79XX_RESET_DELAY_MS=4000 +# CONFIG_AIC79XX_DEBUG_ENABLE is not set +CONFIG_AIC79XX_DEBUG_MASK=0 +# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set +# CONFIG_SCSI_AIC94XX is not set +# CONFIG_SCSI_DPT_I2O is not set +# CONFIG_SCSI_ADVANSYS is not set +# CONFIG_SCSI_ARCMSR is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set +# CONFIG_SCSI_HPTIOP is not set +# CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_EATA is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_GDTH is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_STEX is not set +# CONFIG_SCSI_SYM53C8XX_2 is not set +# CONFIG_SCSI_IPR is not set +# CONFIG_SCSI_QLOGIC_1280 is not set +# CONFIG_SCSI_QLA_FC is not set +# CONFIG_SCSI_QLA_ISCSI is not set +# CONFIG_SCSI_LPFC is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_NSP32 is not set +# CONFIG_SCSI_DEBUG is not set +# CONFIG_SCSI_SRP is not set +CONFIG_ATA=y +# CONFIG_ATA_NONSTANDARD is not set +CONFIG_ATA_ACPI=y +CONFIG_SATA_AHCI=y +CONFIG_SATA_SVW=y +CONFIG_ATA_PIIX=y +# CONFIG_SATA_MV is not set +CONFIG_SATA_NV=y +# CONFIG_PDC_ADMA is not set +# CONFIG_SATA_QSTOR is not set +# CONFIG_SATA_PROMISE is not set +# CONFIG_SATA_SX4 is not set +CONFIG_SATA_SIL=y +# CONFIG_SATA_SIL24 is not set +# CONFIG_SATA_SIS is not set +# CONFIG_SATA_ULI is not set +CONFIG_SATA_VIA=y +# CONFIG_SATA_VITESSE is not set +# CONFIG_SATA_INIC162X is not set +# CONFIG_PATA_ALI is not set +# CONFIG_PATA_AMD is not set +# CONFIG_PATA_ARTOP is not set +# CONFIG_PATA_ATIIXP is not set +# CONFIG_PATA_CMD640_PCI is not set +# CONFIG_PATA_CMD64X is not set +# CONFIG_PATA_CS5520 is not set +# CONFIG_PATA_CS5530 is not set +# CONFIG_PATA_CS5535 is not set +# CONFIG_PATA_CYPRESS is not set +# CONFIG_PATA_EFAR is not set +# CONFIG_ATA_GENERIC is not set +# CONFIG_PATA_HPT366 is not set +# CONFIG_PATA_HPT37X is not set +# CONFIG_PATA_HPT3X2N is not set +# CONFIG_PATA_HPT3X3 is not set +# CONFIG_PATA_IT821X is not set +# CONFIG_PATA_IT8213 is not set +# CONFIG_PATA_JMICRON is not set +# CONFIG_PATA_TRIFLEX is not set +# CONFIG_PATA_MARVELL is not set +# CONFIG_PATA_MPIIX is not set +# CONFIG_PATA_OLDPIIX is not set +# CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NS87410 is not set +# CONFIG_PATA_OPTI is not set +# CONFIG_PATA_OPTIDMA is not set +# CONFIG_PATA_PDC_OLD is not set +# CONFIG_PATA_RADISYS is not set +# CONFIG_PATA_RZ1000 is not set +# CONFIG_PATA_SC1200 is not set +# CONFIG_PATA_SERVERWORKS is not set +# CONFIG_PATA_PDC2027X is not set +# CONFIG_PATA_SIL680 is not set +# CONFIG_PATA_SIS is not set +# CONFIG_PATA_VIA is not set +# CONFIG_PATA_WINBOND is not set +CONFIG_MD=y +# CONFIG_BLK_DEV_MD is not set +CONFIG_BLK_DEV_DM=y +# CONFIG_DM_DEBUG is not set +# CONFIG_DM_CRYPT is not set +# CONFIG_DM_SNAPSHOT is not set +# CONFIG_DM_MIRROR is not set +# CONFIG_DM_ZERO is not set +# CONFIG_DM_MULTIPATH is not set +# CONFIG_DM_DELAY is not set + +# +# Fusion MPT device support +# +CONFIG_FUSION=y +CONFIG_FUSION_SPI=y +# CONFIG_FUSION_FC is not set +# CONFIG_FUSION_SAS is not set +CONFIG_FUSION_MAX_SGE=128 +# CONFIG_FUSION_CTL is not set + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_FIREWIRE is not set +CONFIG_IEEE1394=y + +# +# Subsystem Options +# +# CONFIG_IEEE1394_VERBOSEDEBUG is not set + +# +# Controllers +# + +# +# Texas Instruments PCILynx requires I2C +# +CONFIG_IEEE1394_OHCI1394=y + +# +# Protocols +# +# CONFIG_IEEE1394_VIDEO1394 is not set +# CONFIG_IEEE1394_SBP2 is not set +# CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set +# CONFIG_IEEE1394_ETH1394 is not set +# CONFIG_IEEE1394_DV1394 is not set +CONFIG_IEEE1394_RAWIO=y +# CONFIG_I2O is not set +CONFIG_MACINTOSH_DRIVERS=y +# CONFIG_MAC_EMUMOUSEBTN is not set +CONFIG_NETDEVICES=y +CONFIG_NETDEVICES_MULTIQUEUE=y +# CONFIG_DUMMY is not set +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set +# CONFIG_NET_SB1000 is not set +# CONFIG_ARCNET is not set +# CONFIG_PHYLIB is not set +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set +CONFIG_NET_VENDOR_3COM=y +CONFIG_VORTEX=y +# CONFIG_TYPHOON is not set +CONFIG_NET_TULIP=y +# CONFIG_DE2104X is not set +CONFIG_TULIP=y +# CONFIG_TULIP_MWI is not set +# CONFIG_TULIP_MMIO is not set +# CONFIG_TULIP_NAPI is not set +# CONFIG_DE4X5 is not set +# CONFIG_WINBOND_840 is not set +# CONFIG_DM9102 is not set +# CONFIG_ULI526X is not set +# CONFIG_HP100 is not set +CONFIG_NET_PCI=y +# CONFIG_PCNET32 is not set +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +CONFIG_B44=y +CONFIG_FORCEDETH=y +# CONFIG_FORCEDETH_NAPI is not set +# CONFIG_DGRS is not set +# CONFIG_EEPRO100 is not set +CONFIG_E100=y +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +# CONFIG_NE2K_PCI is not set +CONFIG_8139CP=y +CONFIG_8139TOO=y +# CONFIG_8139TOO_PIO is not set +# CONFIG_8139TOO_TUNE_TWISTER is not set +# CONFIG_8139TOO_8129 is not set +# CONFIG_8139_OLD_RX_RESET is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_TLAN is not set +# CONFIG_VIA_RHINE is not set +# CONFIG_SC92031 is not set +CONFIG_NETDEV_1000=y +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +CONFIG_E1000=y +# CONFIG_E1000_NAPI is not set +# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +CONFIG_R8169=y +# CONFIG_R8169_NAPI is not set +# CONFIG_SIS190 is not set +# CONFIG_SKGE is not set +CONFIG_SKY2=y +# CONFIG_VIA_VELOCITY is not set +CONFIG_TIGON3=y +CONFIG_BNX2=y +# CONFIG_QLA3XXX is not set +# CONFIG_ATL1 is not set +CONFIG_NETDEV_10000=y +# CONFIG_CHELSIO_T1 is not set +# CONFIG_CHELSIO_T3 is not set +# CONFIG_IXGB is not set +# CONFIG_S2IO is not set +# CONFIG_MYRI10GE is not set +# CONFIG_NETXEN_NIC is not set +# CONFIG_MLX4_CORE is not set +# CONFIG_TR is not set + +# +# Wireless LAN +# +# CONFIG_WLAN_PRE80211 is not set +# CONFIG_WLAN_80211 is not set + +# +# USB Network Adapters +# +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +# CONFIG_USB_USBNET_MII is not set +# CONFIG_USB_USBNET is not set +# CONFIG_WAN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_NET_FC is not set +# CONFIG_SHAPER is not set +CONFIG_NETCONSOLE=y +CONFIG_NETPOLL=y +# CONFIG_NETPOLL_TRAP is not set +CONFIG_NET_POLL_CONTROLLER=y +# CONFIG_ISDN is not set +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set +# CONFIG_INPUT_POLLDEV is not set + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_TSDEV is not set +CONFIG_INPUT_EVDEV=y +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_ATKBD=y +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +CONFIG_INPUT_MOUSE=y +CONFIG_MOUSE_PS2=y +CONFIG_MOUSE_PS2_ALPS=y +CONFIG_MOUSE_PS2_LOGIPS2PP=y +CONFIG_MOUSE_PS2_SYNAPTICS=y +CONFIG_MOUSE_PS2_LIFEBOOK=y +CONFIG_MOUSE_PS2_TRACKPOINT=y +# CONFIG_MOUSE_PS2_TOUCHKIT is not set +# CONFIG_MOUSE_SERIAL is not set +# CONFIG_MOUSE_APPLETOUCH is not set +# CONFIG_MOUSE_VSXXXAA is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +# CONFIG_INPUT_MISC is not set + +# +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +# CONFIG_SERIO_SERPORT is not set +# CONFIG_SERIO_CT82C710 is not set +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +# CONFIG_VT_HW_CONSOLE_BINDING is not set +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_FIX_EARLYCON_MEM=y +CONFIG_SERIAL_8250_PCI=y +CONFIG_SERIAL_8250_PNP=y +CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_RUNTIME_UARTS=4 +# CONFIG_SERIAL_8250_EXTENDED is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_IPMI_HANDLER is not set +# CONFIG_WATCHDOG is not set +CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_INTEL=y +CONFIG_HW_RANDOM_AMD=y +CONFIG_HW_RANDOM_GEODE=y +CONFIG_HW_RANDOM_VIA=y +# CONFIG_NVRAM is not set +CONFIG_RTC=y +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set +# CONFIG_SONYPI is not set +CONFIG_AGP=y +# CONFIG_AGP_ALI is not set +# CONFIG_AGP_ATI is not set +# CONFIG_AGP_AMD is not set +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +# CONFIG_AGP_NVIDIA is not set +# CONFIG_AGP_SIS is not set +# CONFIG_AGP_SWORKS is not set +# CONFIG_AGP_VIA is not set +# CONFIG_AGP_EFFICEON is not set +# CONFIG_DRM is not set +# CONFIG_MWAVE is not set +# CONFIG_PC8736x_GPIO is not set +# CONFIG_NSC_GPIO is not set +# CONFIG_CS5535_GPIO is not set +CONFIG_RAW_DRIVER=y +CONFIG_MAX_RAW_DEVS=256 +CONFIG_HPET=y +# CONFIG_HPET_RTC_IRQ is not set +CONFIG_HPET_MMAP=y +# CONFIG_HANGCHECK_TIMER is not set +# CONFIG_TCG_TPM is not set +# CONFIG_TELCLOCK is not set +CONFIG_DEVPORT=y +# CONFIG_I2C is not set + +# +# SPI support +# +# CONFIG_SPI is not set +# CONFIG_SPI_MASTER is not set +# CONFIG_W1 is not set +# CONFIG_POWER_SUPPLY is not set +# CONFIG_HWMON is not set + +# +# Multifunction device drivers +# +# CONFIG_MFD_SM501 is not set + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set +# CONFIG_DVB_CORE is not set +CONFIG_DAB=y +# CONFIG_USB_DABUSB is not set + +# +# Graphics support +# +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set +# CONFIG_VGASTATE is not set +# CONFIG_FB is not set + +# +# Console display driver support +# +CONFIG_VGA_CONSOLE=y +CONFIG_VGACON_SOFT_SCROLLBACK=y +CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=128 +CONFIG_VIDEO_SELECT=y +CONFIG_DUMMY_CONSOLE=y + +# +# Sound +# +CONFIG_SOUND=y + +# +# Advanced Linux Sound Architecture +# +# CONFIG_SND is not set + +# +# Open Sound System +# +CONFIG_SOUND_PRIME=y +# CONFIG_SOUND_TRIDENT is not set +# CONFIG_SOUND_MSNDCLAS is not set +# CONFIG_SOUND_MSNDPIN is not set +# CONFIG_SOUND_OSS is not set +CONFIG_HID_SUPPORT=y +CONFIG_HID=y +# CONFIG_HID_DEBUG is not set + +# +# USB Input Devices +# +CONFIG_USB_HID=y +# CONFIG_USB_HIDINPUT_POWERBOOK is not set +# CONFIG_HID_FF is not set +# CONFIG_USB_HIDDEV is not set +CONFIG_USB_SUPPORT=y +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y +CONFIG_USB=y +# CONFIG_USB_DEBUG is not set + +# +# Miscellaneous USB options +# +CONFIG_USB_DEVICEFS=y +# CONFIG_USB_DEVICE_CLASS is not set +# CONFIG_USB_DYNAMIC_MINORS is not set +# CONFIG_USB_SUSPEND is not set +# CONFIG_USB_PERSIST is not set +# CONFIG_USB_OTG is not set + +# +# USB Host Controller Drivers +# +CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_SPLIT_ISO is not set +# CONFIG_USB_EHCI_ROOT_HUB_TT is not set +# CONFIG_USB_EHCI_TT_NEWSCHED is not set +# CONFIG_USB_ISP116X_HCD is not set +CONFIG_USB_OHCI_HCD=y +# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set +# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set +CONFIG_USB_OHCI_LITTLE_ENDIAN=y +CONFIG_USB_UHCI_HCD=y +# CONFIG_USB_SL811_HCD is not set +# CONFIG_USB_R8A66597_HCD is not set + +# +# USB Device Class drivers +# +# CONFIG_USB_ACM is not set +CONFIG_USB_PRINTER=y + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# + +# +# may also be needed; see USB_STORAGE Help for more information +# +CONFIG_USB_STORAGE=y +# CONFIG_USB_STORAGE_DEBUG is not set +# CONFIG_USB_STORAGE_DATAFAB is not set +# CONFIG_USB_STORAGE_FREECOM is not set +# CONFIG_USB_STORAGE_ISD200 is not set +# CONFIG_USB_STORAGE_DPCM is not set +# CONFIG_USB_STORAGE_USBAT is not set +# CONFIG_USB_STORAGE_SDDR09 is not set +# CONFIG_USB_STORAGE_SDDR55 is not set +# CONFIG_USB_STORAGE_JUMPSHOT is not set +# CONFIG_USB_STORAGE_ALAUDA is not set +# CONFIG_USB_STORAGE_KARMA is not set +# CONFIG_USB_LIBUSUAL is not set + +# +# USB Imaging devices +# +# CONFIG_USB_MDC800 is not set +# CONFIG_USB_MICROTEK is not set +CONFIG_USB_MON=y + +# +# USB port drivers +# + +# +# USB Serial Converter support +# +# CONFIG_USB_SERIAL is not set + +# +# USB Miscellaneous drivers +# +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_ADUTUX is not set +# CONFIG_USB_AUERSWALD is not set +# CONFIG_USB_RIO500 is not set +# CONFIG_USB_LEGOTOWER is not set +# CONFIG_USB_LCD is not set +# CONFIG_USB_BERRY_CHARGE is not set +# CONFIG_USB_LED is not set +# CONFIG_USB_CYPRESS_CY7C63 is not set +# CONFIG_USB_CYTHERM is not set +# CONFIG_USB_PHIDGET is not set +# CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_FTDI_ELAN is not set +# CONFIG_USB_APPLEDISPLAY is not set +# CONFIG_USB_SISUSBVGA is not set +# CONFIG_USB_LD is not set +# CONFIG_USB_TRANCEVIBRATOR is not set +# CONFIG_USB_IOWARRIOR is not set +# CONFIG_USB_TEST is not set + +# +# USB DSL modem support +# + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set +# CONFIG_MMC is not set + +# +# LED devices +# +# CONFIG_NEW_LEDS is not set + +# +# LED drivers +# + +# +# LED Triggers +# +# CONFIG_INFINIBAND is not set +# CONFIG_EDAC is not set + +# +# Real Time Clock +# +# CONFIG_RTC_CLASS is not set + +# +# DMA Engine support +# +# CONFIG_DMA_ENGINE is not set + +# +# DMA Clients +# + +# +# DMA Devices +# +CONFIG_VIRTUALIZATION=y +# CONFIG_KVM is not set + +# +# Userspace I/O +# +# CONFIG_UIO is not set + +# +# File systems +# +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +# CONFIG_EXT2_FS_SECURITY is not set +# CONFIG_EXT2_FS_XIP is not set +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +# CONFIG_EXT3_FS_SECURITY is not set +# CONFIG_EXT4DEV_FS is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +CONFIG_REISERFS_FS=y +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +# CONFIG_REISERFS_FS_SECURITY is not set +# CONFIG_JFS_FS is not set +CONFIG_FS_POSIX_ACL=y +# CONFIG_XFS_FS is not set +# CONFIG_GFS2_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_ROMFS_FS is not set +CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y +# CONFIG_QUOTA is not set +CONFIG_DNOTIFY=y +# CONFIG_AUTOFS_FS is not set +CONFIG_AUTOFS4_FS=y +# CONFIG_FUSE_FS is not set +CONFIG_GENERIC_ACL=y + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=y +# CONFIG_JOLIET is not set +# CONFIG_ZISOFS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y +CONFIG_RAMFS=y +# CONFIG_CONFIGFS_FS is not set + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +# CONFIG_NFS_V4 is not set +# CONFIG_NFS_DIRECTIO is not set +CONFIG_NFSD=y +CONFIG_NFSD_V3=y +# CONFIG_NFSD_V3_ACL is not set +# CONFIG_NFSD_V4 is not set +CONFIG_NFSD_TCP=y +CONFIG_ROOT_NFS=y +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_EXPORTFS=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +# CONFIG_SUNRPC_BIND34 is not set +# CONFIG_RPCSEC_GSS_KRB5 is not set +# CONFIG_RPCSEC_GSS_SPKM3 is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y + +# +# Native Language Support +# +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=y +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +CONFIG_NLS_ISO8859_15=y +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +CONFIG_NLS_UTF8=y + +# +# Distributed Lock Manager +# +# CONFIG_DLM is not set +CONFIG_INSTRUMENTATION=y +CONFIG_PROFILING=y +CONFIG_OPROFILE=y +CONFIG_KPROBES=y + +# +# Kernel hacking +# +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +# CONFIG_PRINTK_TIME is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_UNUSED_SYMBOLS=y +# CONFIG_DEBUG_FS is not set +# CONFIG_HEADERS_CHECK is not set +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SHIRQ is not set +CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_SCHED_DEBUG is not set +# CONFIG_SCHEDSTATS is not set +CONFIG_TIMER_STATS=y +# CONFIG_SLUB_DEBUG_ON is not set +# CONFIG_DEBUG_RT_MUTEXES is not set +# CONFIG_RT_MUTEX_TESTER is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_LOCK_ALLOC is not set +# CONFIG_PROVE_LOCKING is not set +# CONFIG_LOCK_STAT is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_HIGHMEM is not set +CONFIG_DEBUG_BUGVERBOSE=y +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_LIST is not set +# CONFIG_FRAME_POINTER is not set +# CONFIG_FORCED_INLINING is not set +# CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_LKDTM is not set +# CONFIG_FAULT_INJECTION is not set +CONFIG_EARLY_PRINTK=y +CONFIG_DEBUG_STACKOVERFLOW=y +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_RODATA is not set +# CONFIG_4KSTACKS is not set +CONFIG_X86_FIND_SMP_CONFIG=y +CONFIG_X86_MPPARSE=y +CONFIG_DOUBLEFAULT=y + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set +# CONFIG_CRYPTO is not set + +# +# Library routines +# +CONFIG_BITREVERSE=y +# CONFIG_CRC_CCITT is not set +# CONFIG_CRC16 is not set +# CONFIG_CRC_ITU_T is not set +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +# CONFIG_LIBCRC32C is not set +CONFIG_ZLIB_INFLATE=y +CONFIG_PLIST=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_PENDING_IRQ=y +CONFIG_X86_SMP=y +CONFIG_X86_HT=y +CONFIG_X86_BIOS_REBOOT=y +CONFIG_X86_TRAMPOLINE=y +CONFIG_KTIME_SCALAR=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig new file mode 100644 index 000000000000..38a83f9c966f --- /dev/null +++ b/arch/x86/configs/x86_64_defconfig @@ -0,0 +1,1388 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.22-git14 +# Fri Jul 20 09:53:15 2007 +# +CONFIG_X86_64=y +CONFIG_64BIT=y +CONFIG_X86=y +CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_TIME_VSYSCALL=y +CONFIG_GENERIC_CMOS_UPDATE=y +CONFIG_ZONE_DMA32=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_SEMAPHORE_SLEEPERS=y +CONFIG_MMU=y +CONFIG_ZONE_DMA=y +CONFIG_QUICKLIST=y +CONFIG_NR_QUICK=2 +CONFIG_RWSEM_GENERIC_SPINLOCK=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_X86_CMPXCHG=y +CONFIG_EARLY_PRINTK=y +CONFIG_GENERIC_ISA_DMA=y +CONFIG_GENERIC_IOMAP=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y +CONFIG_ARCH_POPULATES_NODE_MAP=y +CONFIG_DMI=y +CONFIG_AUDIT_ARCH=y +CONFIG_GENERIC_BUG=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 + +# +# General setup +# +CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +CONFIG_POSIX_MQUEUE=y +# CONFIG_BSD_PROCESS_ACCT is not set +# CONFIG_TASKSTATS is not set +# CONFIG_USER_NS is not set +# CONFIG_AUDIT is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=18 +# CONFIG_CPUSETS is not set +CONFIG_SYSFS_DEPRECATED=y +CONFIG_RELAY=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y +# CONFIG_EMBEDDED is not set +CONFIG_UID16=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_ANON_INODES=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set +CONFIG_RT_MUTEXES=y +# CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +# CONFIG_KMOD is not set +CONFIG_STOP_MACHINE=y +CONFIG_BLOCK=y +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_BLK_DEV_BSG is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +# CONFIG_IOSCHED_AS is not set +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +# CONFIG_DEFAULT_AS is not set +# CONFIG_DEFAULT_DEADLINE is not set +CONFIG_DEFAULT_CFQ=y +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="cfq" + +# +# Processor type and features +# +CONFIG_X86_PC=y +# CONFIG_X86_VSMP is not set +# CONFIG_MK8 is not set +# CONFIG_MPSC is not set +# CONFIG_MCORE2 is not set +CONFIG_GENERIC_CPU=y +CONFIG_X86_L1_CACHE_BYTES=128 +CONFIG_X86_L1_CACHE_SHIFT=7 +CONFIG_X86_INTERNODE_CACHE_BYTES=128 +CONFIG_X86_TSC=y +CONFIG_X86_GOOD_APIC=y +# CONFIG_MICROCODE is not set +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y +CONFIG_X86_HT=y +CONFIG_X86_IO_APIC=y +CONFIG_X86_LOCAL_APIC=y +CONFIG_MTRR=y +CONFIG_SMP=y +CONFIG_SCHED_SMT=y +CONFIG_SCHED_MC=y +# CONFIG_PREEMPT_NONE is not set +CONFIG_PREEMPT_VOLUNTARY=y +# CONFIG_PREEMPT is not set +CONFIG_PREEMPT_BKL=y +CONFIG_NUMA=y +CONFIG_K8_NUMA=y +CONFIG_NODES_SHIFT=6 +CONFIG_X86_64_ACPI_NUMA=y +CONFIG_NUMA_EMU=y +CONFIG_ARCH_DISCONTIGMEM_ENABLE=y +CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_SELECT_MEMORY_MODEL=y +# CONFIG_FLATMEM_MANUAL is not set +CONFIG_DISCONTIGMEM_MANUAL=y +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_DISCONTIGMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_NEED_MULTIPLE_NODES=y +# CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPLIT_PTLOCK_CPUS=4 +CONFIG_MIGRATION=y +CONFIG_RESOURCES_64BIT=y +CONFIG_ZONE_DMA_FLAG=1 +CONFIG_BOUNCE=y +CONFIG_VIRT_TO_BUS=y +CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y +CONFIG_OUT_OF_LINE_PFN_TO_PAGE=y +CONFIG_NR_CPUS=32 +CONFIG_PHYSICAL_ALIGN=0x200000 +CONFIG_HOTPLUG_CPU=y +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y +CONFIG_HPET_TIMER=y +CONFIG_HPET_EMULATE_RTC=y +CONFIG_GART_IOMMU=y +# CONFIG_CALGARY_IOMMU is not set +CONFIG_SWIOTLB=y +CONFIG_X86_MCE=y +CONFIG_X86_MCE_INTEL=y +CONFIG_X86_MCE_AMD=y +# CONFIG_KEXEC is not set +# CONFIG_CRASH_DUMP is not set +# CONFIG_RELOCATABLE is not set +CONFIG_PHYSICAL_START=0x200000 +CONFIG_SECCOMP=y +# CONFIG_CC_STACKPROTECTOR is not set +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_300 is not set +# CONFIG_HZ_1000 is not set +CONFIG_HZ=250 +CONFIG_K8_NB=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_ISA_DMA_API=y +CONFIG_GENERIC_PENDING_IRQ=y + +# +# Power management options +# +CONFIG_PM=y +# CONFIG_PM_LEGACY is not set +# CONFIG_PM_DEBUG is not set +CONFIG_HIBERNATION=y +CONFIG_PM_STD_PARTITION="" + +# +# ACPI (Advanced Configuration and Power Interface) Support +# +CONFIG_ACPI=y +CONFIG_ACPI_SLEEP=y +CONFIG_ACPI_SLEEP_PROC_FS=y +CONFIG_ACPI_SLEEP_PROC_SLEEP=y +CONFIG_ACPI_PROCFS=y +CONFIG_ACPI_AC=y +CONFIG_ACPI_BATTERY=y +CONFIG_ACPI_BUTTON=y +CONFIG_ACPI_FAN=y +# CONFIG_ACPI_DOCK is not set +CONFIG_ACPI_PROCESSOR=y +CONFIG_ACPI_HOTPLUG_CPU=y +CONFIG_ACPI_THERMAL=y +CONFIG_ACPI_NUMA=y +# CONFIG_ACPI_ASUS is not set +# CONFIG_ACPI_TOSHIBA is not set +CONFIG_ACPI_BLACKLIST_YEAR=0 +# CONFIG_ACPI_DEBUG is not set +CONFIG_ACPI_EC=y +CONFIG_ACPI_POWER=y +CONFIG_ACPI_SYSTEM=y +CONFIG_X86_PM_TIMER=y +CONFIG_ACPI_CONTAINER=y +# CONFIG_ACPI_SBS is not set + +# +# CPU Frequency scaling +# +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_TABLE=y +CONFIG_CPU_FREQ_DEBUG=y +CONFIG_CPU_FREQ_STAT=y +# CONFIG_CPU_FREQ_STAT_DETAILS is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y + +# +# CPUFreq processor drivers +# +CONFIG_X86_POWERNOW_K8=y +CONFIG_X86_POWERNOW_K8_ACPI=y +# CONFIG_X86_SPEEDSTEP_CENTRINO is not set +CONFIG_X86_ACPI_CPUFREQ=y + +# +# shared options +# +CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y +# CONFIG_X86_SPEEDSTEP_LIB is not set + +# +# Bus options (PCI etc.) +# +CONFIG_PCI=y +CONFIG_PCI_DIRECT=y +CONFIG_PCI_MMCONFIG=y +CONFIG_PCIEPORTBUS=y +CONFIG_PCIEAER=y +CONFIG_ARCH_SUPPORTS_MSI=y +CONFIG_PCI_MSI=y +# CONFIG_PCI_DEBUG is not set +# CONFIG_HT_IRQ is not set + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set +# CONFIG_HOTPLUG_PCI is not set + +# +# Executable file formats / Emulations +# +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_MISC is not set +CONFIG_IA32_EMULATION=y +CONFIG_IA32_AOUT=y +CONFIG_COMPAT=y +CONFIG_SYSVIPC_COMPAT=y + +# +# Networking +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +CONFIG_UNIX=y +# CONFIG_NET_KEY is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +# CONFIG_IP_PNP_BOOTP is not set +# CONFIG_IP_PNP_RARP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +CONFIG_INET_TUNNEL=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set +CONFIG_IPV6=y +# CONFIG_IPV6_PRIVACY is not set +# CONFIG_IPV6_ROUTER_PREF is not set +# CONFIG_IPV6_OPTIMISTIC_DAD is not set +# CONFIG_INET6_AH is not set +# CONFIG_INET6_ESP is not set +# CONFIG_INET6_IPCOMP is not set +# CONFIG_IPV6_MIP6 is not set +# CONFIG_INET6_XFRM_TUNNEL is not set +# CONFIG_INET6_TUNNEL is not set +# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET6_XFRM_MODE_TUNNEL is not set +# CONFIG_INET6_XFRM_MODE_BEET is not set +# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set +CONFIG_IPV6_SIT=y +# CONFIG_IPV6_TUNNEL is not set +# CONFIG_IPV6_MULTIPLE_TABLES is not set +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETFILTER is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_NET_TCPPROBE is not set +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set + +# +# Wireless +# +# CONFIG_CFG80211 is not set +# CONFIG_WIRELESS_EXT is not set +# CONFIG_MAC80211 is not set +# CONFIG_IEEE80211 is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +CONFIG_FW_LOADER=y +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set +# CONFIG_SYS_HYPERVISOR is not set +# CONFIG_CONNECTOR is not set +# CONFIG_MTD is not set +# CONFIG_PARPORT is not set +CONFIG_PNP=y +# CONFIG_PNP_DEBUG is not set + +# +# Protocols +# +CONFIG_PNPACPI=y +CONFIG_BLK_DEV=y +CONFIG_BLK_DEV_FD=y +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=y +# CONFIG_BLK_DEV_CRYPTOLOOP is not set +# CONFIG_BLK_DEV_NBD is not set +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_UB is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=4096 +CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set +CONFIG_MISC_DEVICES=y +# CONFIG_IBM_ASM is not set +# CONFIG_PHANTOM is not set +# CONFIG_EEPROM_93CX6 is not set +# CONFIG_SGI_IOC4 is not set +# CONFIG_TIFM_CORE is not set +# CONFIG_SONY_LAPTOP is not set +# CONFIG_THINKPAD_ACPI is not set +CONFIG_IDE=y +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_IDE_SATA is not set +# CONFIG_BLK_DEV_HD_IDE is not set +CONFIG_BLK_DEV_IDEDISK=y +CONFIG_IDEDISK_MULTI_MODE=y +CONFIG_BLK_DEV_IDECD=y +# CONFIG_BLK_DEV_IDETAPE is not set +# CONFIG_BLK_DEV_IDEFLOPPY is not set +# CONFIG_BLK_DEV_IDESCSI is not set +CONFIG_BLK_DEV_IDEACPI=y +# CONFIG_IDE_TASK_IOCTL is not set +CONFIG_IDE_PROC_FS=y + +# +# IDE chipset support/bugfixes +# +CONFIG_IDE_GENERIC=y +# CONFIG_BLK_DEV_CMD640 is not set +# CONFIG_BLK_DEV_IDEPNP is not set +CONFIG_BLK_DEV_IDEPCI=y +# CONFIG_IDEPCI_SHARE_IRQ is not set +CONFIG_IDEPCI_PCIBUS_ORDER=y +# CONFIG_BLK_DEV_OFFBOARD is not set +# CONFIG_BLK_DEV_GENERIC is not set +# CONFIG_BLK_DEV_OPTI621 is not set +# CONFIG_BLK_DEV_RZ1000 is not set +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +# CONFIG_IDEDMA_ONLYDISK is not set +# CONFIG_BLK_DEV_AEC62XX is not set +# CONFIG_BLK_DEV_ALI15X3 is not set +CONFIG_BLK_DEV_AMD74XX=y +CONFIG_BLK_DEV_ATIIXP=y +# CONFIG_BLK_DEV_CMD64X is not set +# CONFIG_BLK_DEV_TRIFLEX is not set +# CONFIG_BLK_DEV_CY82C693 is not set +# CONFIG_BLK_DEV_CS5520 is not set +# CONFIG_BLK_DEV_CS5530 is not set +# CONFIG_BLK_DEV_HPT34X is not set +# CONFIG_BLK_DEV_HPT366 is not set +# CONFIG_BLK_DEV_JMICRON is not set +# CONFIG_BLK_DEV_SC1200 is not set +CONFIG_BLK_DEV_PIIX=y +# CONFIG_BLK_DEV_IT8213 is not set +# CONFIG_BLK_DEV_IT821X is not set +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_PDC202XX_OLD is not set +CONFIG_BLK_DEV_PDC202XX_NEW=y +# CONFIG_BLK_DEV_SVWKS is not set +# CONFIG_BLK_DEV_SIIMAGE is not set +# CONFIG_BLK_DEV_SIS5513 is not set +# CONFIG_BLK_DEV_SLC90E66 is not set +# CONFIG_BLK_DEV_TRM290 is not set +# CONFIG_BLK_DEV_VIA82CXXX is not set +# CONFIG_BLK_DEV_TC86C001 is not set +# CONFIG_IDE_ARM is not set +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_IDEDMA_IVB is not set +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +CONFIG_SCSI=y +CONFIG_SCSI_DMA=y +# CONFIG_SCSI_TGT is not set +CONFIG_SCSI_NETLINK=y +# CONFIG_SCSI_PROC_FS is not set + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +# CONFIG_CHR_DEV_ST is not set +# CONFIG_CHR_DEV_OSST is not set +CONFIG_BLK_DEV_SR=y +# CONFIG_BLK_DEV_SR_VENDOR is not set +CONFIG_CHR_DEV_SG=y +# CONFIG_CHR_DEV_SCH is not set + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +# CONFIG_SCSI_MULTI_LUN is not set +CONFIG_SCSI_CONSTANTS=y +# CONFIG_SCSI_LOGGING is not set +# CONFIG_SCSI_SCAN_ASYNC is not set +CONFIG_SCSI_WAIT_SCAN=m + +# +# SCSI Transports +# +CONFIG_SCSI_SPI_ATTRS=y +CONFIG_SCSI_FC_ATTRS=y +# CONFIG_SCSI_ISCSI_ATTRS is not set +CONFIG_SCSI_SAS_ATTRS=y +# CONFIG_SCSI_SAS_LIBSAS is not set + +# +# SCSI low-level drivers +# +# CONFIG_ISCSI_TCP is not set +# CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AACRAID is not set +# CONFIG_SCSI_AIC7XXX is not set +# CONFIG_SCSI_AIC7XXX_OLD is not set +CONFIG_SCSI_AIC79XX=y +CONFIG_AIC79XX_CMDS_PER_DEVICE=32 +CONFIG_AIC79XX_RESET_DELAY_MS=4000 +# CONFIG_AIC79XX_DEBUG_ENABLE is not set +CONFIG_AIC79XX_DEBUG_MASK=0 +# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set +# CONFIG_SCSI_AIC94XX is not set +# CONFIG_SCSI_ARCMSR is not set +# CONFIG_MEGARAID_NEWGEN is not set +# CONFIG_MEGARAID_LEGACY is not set +# CONFIG_MEGARAID_SAS is not set +# CONFIG_SCSI_HPTIOP is not set +# CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_EATA is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +# CONFIG_SCSI_GDTH is not set +# CONFIG_SCSI_IPS is not set +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_STEX is not set +# CONFIG_SCSI_SYM53C8XX_2 is not set +# CONFIG_SCSI_IPR is not set +# CONFIG_SCSI_QLOGIC_1280 is not set +# CONFIG_SCSI_QLA_FC is not set +# CONFIG_SCSI_QLA_ISCSI is not set +# CONFIG_SCSI_LPFC is not set +# CONFIG_SCSI_DC395x is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_DEBUG is not set +# CONFIG_SCSI_SRP is not set +CONFIG_ATA=y +# CONFIG_ATA_NONSTANDARD is not set +CONFIG_ATA_ACPI=y +CONFIG_SATA_AHCI=y +CONFIG_SATA_SVW=y +CONFIG_ATA_PIIX=y +# CONFIG_SATA_MV is not set +CONFIG_SATA_NV=y +# CONFIG_PDC_ADMA is not set +# CONFIG_SATA_QSTOR is not set +# CONFIG_SATA_PROMISE is not set +# CONFIG_SATA_SX4 is not set +CONFIG_SATA_SIL=y +# CONFIG_SATA_SIL24 is not set +# CONFIG_SATA_SIS is not set +# CONFIG_SATA_ULI is not set +CONFIG_SATA_VIA=y +# CONFIG_SATA_VITESSE is not set +# CONFIG_SATA_INIC162X is not set +# CONFIG_PATA_ALI is not set +# CONFIG_PATA_AMD is not set +# CONFIG_PATA_ARTOP is not set +# CONFIG_PATA_ATIIXP is not set +# CONFIG_PATA_CMD640_PCI is not set +# CONFIG_PATA_CMD64X is not set +# CONFIG_PATA_CS5520 is not set +# CONFIG_PATA_CS5530 is not set +# CONFIG_PATA_CYPRESS is not set +# CONFIG_PATA_EFAR is not set +# CONFIG_ATA_GENERIC is not set +# CONFIG_PATA_HPT366 is not set +# CONFIG_PATA_HPT37X is not set +# CONFIG_PATA_HPT3X2N is not set +# CONFIG_PATA_HPT3X3 is not set +# CONFIG_PATA_IT821X is not set +# CONFIG_PATA_IT8213 is not set +# CONFIG_PATA_JMICRON is not set +# CONFIG_PATA_TRIFLEX is not set +# CONFIG_PATA_MARVELL is not set +# CONFIG_PATA_MPIIX is not set +# CONFIG_PATA_OLDPIIX is not set +# CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NS87410 is not set +# CONFIG_PATA_OPTI is not set +# CONFIG_PATA_OPTIDMA is not set +# CONFIG_PATA_PDC_OLD is not set +# CONFIG_PATA_RADISYS is not set +# CONFIG_PATA_RZ1000 is not set +# CONFIG_PATA_SC1200 is not set +# CONFIG_PATA_SERVERWORKS is not set +# CONFIG_PATA_PDC2027X is not set +# CONFIG_PATA_SIL680 is not set +# CONFIG_PATA_SIS is not set +# CONFIG_PATA_VIA is not set +# CONFIG_PATA_WINBOND is not set +CONFIG_MD=y +# CONFIG_BLK_DEV_MD is not set +CONFIG_BLK_DEV_DM=y +# CONFIG_DM_DEBUG is not set +# CONFIG_DM_CRYPT is not set +# CONFIG_DM_SNAPSHOT is not set +# CONFIG_DM_MIRROR is not set +# CONFIG_DM_ZERO is not set +# CONFIG_DM_MULTIPATH is not set +# CONFIG_DM_DELAY is not set + +# +# Fusion MPT device support +# +CONFIG_FUSION=y +CONFIG_FUSION_SPI=y +# CONFIG_FUSION_FC is not set +# CONFIG_FUSION_SAS is not set +CONFIG_FUSION_MAX_SGE=128 +# CONFIG_FUSION_CTL is not set + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_FIREWIRE is not set +CONFIG_IEEE1394=y + +# +# Subsystem Options +# +# CONFIG_IEEE1394_VERBOSEDEBUG is not set + +# +# Controllers +# + +# +# Texas Instruments PCILynx requires I2C +# +CONFIG_IEEE1394_OHCI1394=y + +# +# Protocols +# +# CONFIG_IEEE1394_VIDEO1394 is not set +# CONFIG_IEEE1394_SBP2 is not set +# CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set +# CONFIG_IEEE1394_ETH1394 is not set +# CONFIG_IEEE1394_DV1394 is not set +CONFIG_IEEE1394_RAWIO=y +# CONFIG_I2O is not set +CONFIG_MACINTOSH_DRIVERS=y +# CONFIG_MAC_EMUMOUSEBTN is not set +CONFIG_NETDEVICES=y +CONFIG_NETDEVICES_MULTIQUEUE=y +# CONFIG_DUMMY is not set +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +CONFIG_TUN=y +# CONFIG_NET_SB1000 is not set +# CONFIG_ARCNET is not set +# CONFIG_PHYLIB is not set +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_CASSINI is not set +CONFIG_NET_VENDOR_3COM=y +CONFIG_VORTEX=y +# CONFIG_TYPHOON is not set +CONFIG_NET_TULIP=y +# CONFIG_DE2104X is not set +CONFIG_TULIP=y +# CONFIG_TULIP_MWI is not set +# CONFIG_TULIP_MMIO is not set +# CONFIG_TULIP_NAPI is not set +# CONFIG_DE4X5 is not set +# CONFIG_WINBOND_840 is not set +# CONFIG_DM9102 is not set +# CONFIG_ULI526X is not set +# CONFIG_HP100 is not set +CONFIG_NET_PCI=y +# CONFIG_PCNET32 is not set +CONFIG_AMD8111_ETH=y +# CONFIG_AMD8111E_NAPI is not set +# CONFIG_ADAPTEC_STARFIRE is not set +CONFIG_B44=y +CONFIG_FORCEDETH=y +# CONFIG_FORCEDETH_NAPI is not set +# CONFIG_DGRS is not set +# CONFIG_EEPRO100 is not set +CONFIG_E100=y +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +# CONFIG_NE2K_PCI is not set +CONFIG_8139CP=y +CONFIG_8139TOO=y +# CONFIG_8139TOO_PIO is not set +# CONFIG_8139TOO_TUNE_TWISTER is not set +# CONFIG_8139TOO_8129 is not set +# CONFIG_8139_OLD_RX_RESET is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_VIA_RHINE is not set +# CONFIG_SC92031 is not set +CONFIG_NETDEV_1000=y +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +CONFIG_E1000=y +# CONFIG_E1000_NAPI is not set +# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SIS190 is not set +# CONFIG_SKGE is not set +# CONFIG_SKY2 is not set +# CONFIG_VIA_VELOCITY is not set +CONFIG_TIGON3=y +CONFIG_BNX2=y +# CONFIG_QLA3XXX is not set +# CONFIG_ATL1 is not set +CONFIG_NETDEV_10000=y +# CONFIG_CHELSIO_T1 is not set +# CONFIG_CHELSIO_T3 is not set +# CONFIG_IXGB is not set +CONFIG_S2IO=m +# CONFIG_S2IO_NAPI is not set +# CONFIG_MYRI10GE is not set +# CONFIG_NETXEN_NIC is not set +# CONFIG_MLX4_CORE is not set +# CONFIG_TR is not set + +# +# Wireless LAN +# +# CONFIG_WLAN_PRE80211 is not set +# CONFIG_WLAN_80211 is not set + +# +# USB Network Adapters +# +# CONFIG_USB_CATC is not set +# CONFIG_USB_KAWETH is not set +# CONFIG_USB_PEGASUS is not set +# CONFIG_USB_RTL8150 is not set +# CONFIG_USB_USBNET_MII is not set +# CONFIG_USB_USBNET is not set +# CONFIG_WAN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_NET_FC is not set +# CONFIG_SHAPER is not set +CONFIG_NETCONSOLE=y +CONFIG_NETPOLL=y +# CONFIG_NETPOLL_TRAP is not set +CONFIG_NET_POLL_CONTROLLER=y +# CONFIG_ISDN is not set +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set +# CONFIG_INPUT_POLLDEV is not set + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_TSDEV is not set +CONFIG_INPUT_EVDEV=y +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_ATKBD=y +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +CONFIG_INPUT_MOUSE=y +CONFIG_MOUSE_PS2=y +CONFIG_MOUSE_PS2_ALPS=y +CONFIG_MOUSE_PS2_LOGIPS2PP=y +CONFIG_MOUSE_PS2_SYNAPTICS=y +CONFIG_MOUSE_PS2_LIFEBOOK=y +CONFIG_MOUSE_PS2_TRACKPOINT=y +# CONFIG_MOUSE_PS2_TOUCHKIT is not set +# CONFIG_MOUSE_SERIAL is not set +# CONFIG_MOUSE_APPLETOUCH is not set +# CONFIG_MOUSE_VSXXXAA is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +# CONFIG_INPUT_MISC is not set + +# +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +# CONFIG_SERIO_SERPORT is not set +# CONFIG_SERIO_CT82C710 is not set +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +# CONFIG_VT_HW_CONSOLE_BINDING is not set +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_FIX_EARLYCON_MEM=y +CONFIG_SERIAL_8250_PCI=y +CONFIG_SERIAL_8250_PNP=y +CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_RUNTIME_UARTS=4 +# CONFIG_SERIAL_8250_EXTENDED is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_IPMI_HANDLER is not set +# CONFIG_WATCHDOG is not set +CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_INTEL=y +CONFIG_HW_RANDOM_AMD=y +# CONFIG_NVRAM is not set +CONFIG_RTC=y +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +# CONFIG_AGP_SIS is not set +# CONFIG_AGP_VIA is not set +# CONFIG_DRM is not set +# CONFIG_MWAVE is not set +# CONFIG_PC8736x_GPIO is not set +CONFIG_RAW_DRIVER=y +CONFIG_MAX_RAW_DEVS=256 +CONFIG_HPET=y +# CONFIG_HPET_RTC_IRQ is not set +CONFIG_HPET_MMAP=y +# CONFIG_HANGCHECK_TIMER is not set +# CONFIG_TCG_TPM is not set +# CONFIG_TELCLOCK is not set +CONFIG_DEVPORT=y +# CONFIG_I2C is not set + +# +# SPI support +# +# CONFIG_SPI is not set +# CONFIG_SPI_MASTER is not set +# CONFIG_W1 is not set +# CONFIG_POWER_SUPPLY is not set +# CONFIG_HWMON is not set + +# +# Multifunction device drivers +# +# CONFIG_MFD_SM501 is not set + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set +# CONFIG_DVB_CORE is not set +CONFIG_DAB=y +# CONFIG_USB_DABUSB is not set + +# +# Graphics support +# +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set +# CONFIG_VGASTATE is not set +# CONFIG_FB is not set + +# +# Console display driver support +# +CONFIG_VGA_CONSOLE=y +CONFIG_VGACON_SOFT_SCROLLBACK=y +CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=256 +CONFIG_VIDEO_SELECT=y +CONFIG_DUMMY_CONSOLE=y + +# +# Sound +# +CONFIG_SOUND=y + +# +# Advanced Linux Sound Architecture +# +# CONFIG_SND is not set + +# +# Open Sound System +# +CONFIG_SOUND_PRIME=y +# CONFIG_SOUND_TRIDENT is not set +# CONFIG_SOUND_MSNDCLAS is not set +# CONFIG_SOUND_MSNDPIN is not set +# CONFIG_SOUND_OSS is not set +CONFIG_HID_SUPPORT=y +CONFIG_HID=y +# CONFIG_HID_DEBUG is not set + +# +# USB Input Devices +# +CONFIG_USB_HID=y +# CONFIG_USB_HIDINPUT_POWERBOOK is not set +# CONFIG_HID_FF is not set +# CONFIG_USB_HIDDEV is not set +CONFIG_USB_SUPPORT=y +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y +CONFIG_USB=y +# CONFIG_USB_DEBUG is not set + +# +# Miscellaneous USB options +# +CONFIG_USB_DEVICEFS=y +# CONFIG_USB_DEVICE_CLASS is not set +# CONFIG_USB_DYNAMIC_MINORS is not set +# CONFIG_USB_SUSPEND is not set +# CONFIG_USB_PERSIST is not set +# CONFIG_USB_OTG is not set + +# +# USB Host Controller Drivers +# +CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_SPLIT_ISO is not set +# CONFIG_USB_EHCI_ROOT_HUB_TT is not set +# CONFIG_USB_EHCI_TT_NEWSCHED is not set +# CONFIG_USB_ISP116X_HCD is not set +CONFIG_USB_OHCI_HCD=y +# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set +# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set +CONFIG_USB_OHCI_LITTLE_ENDIAN=y +CONFIG_USB_UHCI_HCD=y +# CONFIG_USB_SL811_HCD is not set +# CONFIG_USB_R8A66597_HCD is not set + +# +# USB Device Class drivers +# +# CONFIG_USB_ACM is not set +CONFIG_USB_PRINTER=y + +# +# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# + +# +# may also be needed; see USB_STORAGE Help for more information +# +CONFIG_USB_STORAGE=y +# CONFIG_USB_STORAGE_DEBUG is not set +# CONFIG_USB_STORAGE_DATAFAB is not set +# CONFIG_USB_STORAGE_FREECOM is not set +# CONFIG_USB_STORAGE_ISD200 is not set +# CONFIG_USB_STORAGE_DPCM is not set +# CONFIG_USB_STORAGE_USBAT is not set +# CONFIG_USB_STORAGE_SDDR09 is not set +# CONFIG_USB_STORAGE_SDDR55 is not set +# CONFIG_USB_STORAGE_JUMPSHOT is not set +# CONFIG_USB_STORAGE_ALAUDA is not set +# CONFIG_USB_STORAGE_KARMA is not set +# CONFIG_USB_LIBUSUAL is not set + +# +# USB Imaging devices +# +# CONFIG_USB_MDC800 is not set +# CONFIG_USB_MICROTEK is not set +CONFIG_USB_MON=y + +# +# USB port drivers +# + +# +# USB Serial Converter support +# +# CONFIG_USB_SERIAL is not set + +# +# USB Miscellaneous drivers +# +# CONFIG_USB_EMI62 is not set +# CONFIG_USB_EMI26 is not set +# CONFIG_USB_ADUTUX is not set +# CONFIG_USB_AUERSWALD is not set +# CONFIG_USB_RIO500 is not set +# CONFIG_USB_LEGOTOWER is not set +# CONFIG_USB_LCD is not set +# CONFIG_USB_BERRY_CHARGE is not set +# CONFIG_USB_LED is not set +# CONFIG_USB_CYPRESS_CY7C63 is not set +# CONFIG_USB_CYTHERM is not set +# CONFIG_USB_PHIDGET is not set +# CONFIG_USB_IDMOUSE is not set +# CONFIG_USB_FTDI_ELAN is not set +# CONFIG_USB_APPLEDISPLAY is not set +# CONFIG_USB_SISUSBVGA is not set +# CONFIG_USB_LD is not set +# CONFIG_USB_TRANCEVIBRATOR is not set +# CONFIG_USB_IOWARRIOR is not set +# CONFIG_USB_TEST is not set + +# +# USB DSL modem support +# + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set +# CONFIG_MMC is not set + +# +# LED devices +# +# CONFIG_NEW_LEDS is not set + +# +# LED drivers +# + +# +# LED Triggers +# +# CONFIG_INFINIBAND is not set +# CONFIG_EDAC is not set + +# +# Real Time Clock +# +# CONFIG_RTC_CLASS is not set + +# +# DMA Engine support +# +# CONFIG_DMA_ENGINE is not set + +# +# DMA Clients +# + +# +# DMA Devices +# +CONFIG_VIRTUALIZATION=y +# CONFIG_KVM is not set + +# +# Userspace I/O +# +# CONFIG_UIO is not set + +# +# Firmware Drivers +# +# CONFIG_EDD is not set +# CONFIG_DELL_RBU is not set +# CONFIG_DCDBAS is not set +CONFIG_DMIID=y + +# +# File systems +# +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +# CONFIG_EXT2_FS_SECURITY is not set +# CONFIG_EXT2_FS_XIP is not set +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +# CONFIG_EXT3_FS_SECURITY is not set +# CONFIG_EXT4DEV_FS is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +CONFIG_REISERFS_FS=y +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +# CONFIG_REISERFS_FS_SECURITY is not set +# CONFIG_JFS_FS is not set +CONFIG_FS_POSIX_ACL=y +# CONFIG_XFS_FS is not set +# CONFIG_GFS2_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_ROMFS_FS is not set +CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y +# CONFIG_QUOTA is not set +CONFIG_DNOTIFY=y +# CONFIG_AUTOFS_FS is not set +CONFIG_AUTOFS4_FS=y +# CONFIG_FUSE_FS is not set +CONFIG_GENERIC_ACL=y + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +# CONFIG_ZISOFS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y +CONFIG_RAMFS=y +# CONFIG_CONFIGFS_FS is not set + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +# CONFIG_NFS_V4 is not set +# CONFIG_NFS_DIRECTIO is not set +CONFIG_NFSD=y +CONFIG_NFSD_V3=y +# CONFIG_NFSD_V3_ACL is not set +# CONFIG_NFSD_V4 is not set +CONFIG_NFSD_TCP=y +CONFIG_ROOT_NFS=y +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_EXPORTFS=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +# CONFIG_SUNRPC_BIND34 is not set +# CONFIG_RPCSEC_GSS_KRB5 is not set +# CONFIG_RPCSEC_GSS_SPKM3 is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y + +# +# Native Language Support +# +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=y +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +CONFIG_NLS_ASCII=y +CONFIG_NLS_ISO8859_1=y +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +CONFIG_NLS_ISO8859_15=y +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +CONFIG_NLS_UTF8=y + +# +# Distributed Lock Manager +# +# CONFIG_DLM is not set + +# +# Instrumentation Support +# +CONFIG_PROFILING=y +CONFIG_OPROFILE=y +CONFIG_KPROBES=y + +# +# Kernel hacking +# +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +# CONFIG_PRINTK_TIME is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_UNUSED_SYMBOLS=y +CONFIG_DEBUG_FS=y +# CONFIG_HEADERS_CHECK is not set +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SHIRQ is not set +CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_SCHED_DEBUG is not set +# CONFIG_SCHEDSTATS is not set +CONFIG_TIMER_STATS=y +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_RT_MUTEXES is not set +# CONFIG_RT_MUTEX_TESTER is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_LOCK_ALLOC is not set +# CONFIG_PROVE_LOCKING is not set +# CONFIG_LOCK_STAT is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +# CONFIG_DEBUG_KOBJECT is not set +CONFIG_DEBUG_BUGVERBOSE=y +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_LIST is not set +# CONFIG_FRAME_POINTER is not set +# CONFIG_FORCED_INLINING is not set +# CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_LKDTM is not set +# CONFIG_FAULT_INJECTION is not set +# CONFIG_DEBUG_RODATA is not set +# CONFIG_IOMMU_DEBUG is not set +CONFIG_DEBUG_STACKOVERFLOW=y +# CONFIG_DEBUG_STACK_USAGE is not set + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set +# CONFIG_CRYPTO is not set + +# +# Library routines +# +CONFIG_BITREVERSE=y +# CONFIG_CRC_CCITT is not set +# CONFIG_CRC16 is not set +# CONFIG_CRC_ITU_T is not set +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +# CONFIG_LIBCRC32C is not set +CONFIG_ZLIB_INFLATE=y +CONFIG_PLIST=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 18dcdc6fb7aa..46bb609e2444 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -1,5 +1,15 @@ -ifeq ($(CONFIG_X86_32),y) -include ${srctree}/arch/x86/crypto/Makefile_32 -else -include ${srctree}/arch/x86/crypto/Makefile_64 -endif +# +# Arch-specific CryptoAPI modules. +# + +obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o +obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o + +obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o +obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o + +aes-i586-y := aes-i586-asm_32.o aes_32.o +twofish-i586-y := twofish-i586-asm_32.o twofish_32.o + +aes-x86_64-y := aes-x86_64-asm_64.o aes_64.o +twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_64.o diff --git a/arch/x86/crypto/Makefile_32 b/arch/x86/crypto/Makefile_32 deleted file mode 100644 index 2d873a2388ed..000000000000 --- a/arch/x86/crypto/Makefile_32 +++ /dev/null @@ -1,12 +0,0 @@ -# -# x86/crypto/Makefile -# -# Arch-specific CryptoAPI modules. -# - -obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o -obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o - -aes-i586-y := aes-i586-asm_32.o aes_32.o -twofish-i586-y := twofish-i586-asm_32.o twofish_32.o - diff --git a/arch/x86/crypto/Makefile_64 b/arch/x86/crypto/Makefile_64 deleted file mode 100644 index b40896276e93..000000000000 --- a/arch/x86/crypto/Makefile_64 +++ /dev/null @@ -1,12 +0,0 @@ -# -# x86/crypto/Makefile -# -# Arch-specific CryptoAPI modules. -# - -obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o -obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o - -aes-x86_64-y := aes-x86_64-asm_64.o aes_64.o -twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_64.o - diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile index cdae36435e21..e2edda255a84 100644 --- a/arch/x86/ia32/Makefile +++ b/arch/x86/ia32/Makefile @@ -18,18 +18,35 @@ $(obj)/syscall32_syscall.o: \ $(foreach F,sysenter syscall,$(obj)/vsyscall-$F.so) # Teach kbuild about targets -targets := $(foreach F,sysenter syscall,vsyscall-$F.o vsyscall-$F.so) +targets := $(foreach F,$(addprefix vsyscall-,sysenter syscall),\ + $F.o $F.so $F.so.dbg) # The DSO images are built using a special linker script quiet_cmd_syscall = SYSCALL $@ - cmd_syscall = $(CC) -m32 -nostdlib -shared -s \ + cmd_syscall = $(CC) -m32 -nostdlib -shared \ $(call ld-option, -Wl$(comma)--hash-style=sysv) \ -Wl,-soname=linux-gate.so.1 -o $@ \ -Wl,-T,$(filter-out FORCE,$^) -$(obj)/vsyscall-sysenter.so $(obj)/vsyscall-syscall.so: \ -$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +$(obj)/vsyscall-sysenter.so.dbg $(obj)/vsyscall-syscall.so.dbg: \ +$(obj)/vsyscall-%.so.dbg: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE $(call if_changed,syscall) AFLAGS_vsyscall-sysenter.o = -m32 -Wa,-32 AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32 + +vdsos := vdso32-sysenter.so vdso32-syscall.so + +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(@:vdso32-%.so=$(obj)/vsyscall-%.so.dbg) \ + $(MODLIB)/vdso/$@ + +$(vdsos): + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: $(vdsos) diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 08781370256d..f82e1a94fcb7 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -40,7 +40,7 @@ static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); static int load_aout_library(struct file*); #ifdef CORE_DUMP -static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file); +static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit); /* * fill in the user structure for a core dump.. @@ -148,7 +148,7 @@ if (file->f_op->llseek) { \ * dumping of the process results in another error.. */ -static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file) +static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit) { mm_segment_t fs; int has_dumped = 0; @@ -168,13 +168,11 @@ static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file) /* If the size of the dump file exceeds the rlimit, then see what would happen if we wrote the stack, but not the data area. */ - if ((dump.u_dsize+dump.u_ssize+1) * PAGE_SIZE > - current->signal->rlim[RLIMIT_CORE].rlim_cur) + if ((dump.u_dsize + dump.u_ssize + 1) * PAGE_SIZE > limit) dump.u_dsize = 0; /* Make sure we have enough room to write the stack and data areas. */ - if ((dump.u_ssize+1) * PAGE_SIZE > - current->signal->rlim[RLIMIT_CORE].rlim_cur) + if ((dump.u_ssize + 1) * PAGE_SIZE > limit) dump.u_ssize = 0; /* make sure we actually have a data and stack area to dump */ @@ -422,6 +420,8 @@ beyond_if: (regs)->eflags = 0x200; (regs)->cs = __USER32_CS; (regs)->ss = __USER32_DS; + regs->r8 = regs->r9 = regs->r10 = regs->r11 = + regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; set_fs(USER_DS); if (unlikely(current->ptrace & PT_PTRACED)) { if (current->ptrace & PT_TRACE_EXEC) diff --git a/arch/x86/ia32/ia32_binfmt.c b/arch/x86/ia32/ia32_binfmt.c index dffd2ac72747..55822d2cf053 100644 --- a/arch/x86/ia32/ia32_binfmt.c +++ b/arch/x86/ia32/ia32_binfmt.c @@ -5,10 +5,6 @@ * This tricks binfmt_elf.c into loading 32bit binaries using lots * of ugly preprocessor tricks. Talk about very very poor man's inheritance. */ -#define __ASM_X86_64_ELF_H 1 - -#undef ELF_CLASS -#define ELF_CLASS ELFCLASS32 #include <linux/types.h> #include <linux/stddef.h> @@ -19,6 +15,7 @@ #include <linux/binfmts.h> #include <linux/mm.h> #include <linux/security.h> +#include <linux/elfcore-compat.h> #include <asm/segment.h> #include <asm/ptrace.h> @@ -31,6 +28,20 @@ #include <asm/ia32.h> #include <asm/vsyscall32.h> +#undef ELF_ARCH +#undef ELF_CLASS +#define ELF_CLASS ELFCLASS32 +#define ELF_ARCH EM_386 + +#undef elfhdr +#undef elf_phdr +#undef elf_note +#undef elf_addr_t +#define elfhdr elf32_hdr +#define elf_phdr elf32_phdr +#define elf_note elf32_note +#define elf_addr_t Elf32_Off + #define ELF_NAME "elf/i386" #define AT_SYSINFO 32 @@ -48,77 +59,20 @@ int sysctl_vsyscall32 = 1; } while(0) struct file; -struct elf_phdr; #define IA32_EMULATOR 1 -#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) - -#undef ELF_ARCH -#define ELF_ARCH EM_386 - -#define ELF_DATA ELFDATA2LSB +#undef ELF_ET_DYN_BASE -#define USE_ELF_CORE_DUMP 1 - -/* Override elfcore.h */ -#define _LINUX_ELFCORE_H 1 -typedef unsigned int elf_greg_t; - -#define ELF_NGREG (sizeof (struct user_regs_struct32) / sizeof(elf_greg_t)) -typedef elf_greg_t elf_gregset_t[ELF_NGREG]; - -struct elf_siginfo -{ - int si_signo; /* signal number */ - int si_code; /* extra code */ - int si_errno; /* errno */ -}; +#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) #define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0) -struct elf_prstatus -{ - struct elf_siginfo pr_info; /* Info associated with signal */ - short pr_cursig; /* Current signal */ - unsigned int pr_sigpend; /* Set of pending signals */ - unsigned int pr_sighold; /* Set of held signals */ - pid_t pr_pid; - pid_t pr_ppid; - pid_t pr_pgrp; - pid_t pr_sid; - struct compat_timeval pr_utime; /* User time */ - struct compat_timeval pr_stime; /* System time */ - struct compat_timeval pr_cutime; /* Cumulative user time */ - struct compat_timeval pr_cstime; /* Cumulative system time */ - elf_gregset_t pr_reg; /* GP registers */ - int pr_fpvalid; /* True if math co-processor being used. */ -}; - -#define ELF_PRARGSZ (80) /* Number of chars for args */ - -struct elf_prpsinfo -{ - char pr_state; /* numeric process state */ - char pr_sname; /* char for pr_state */ - char pr_zomb; /* zombie */ - char pr_nice; /* nice val */ - unsigned int pr_flag; /* flags */ - __u16 pr_uid; - __u16 pr_gid; - pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid; - /* Lots missing */ - char pr_fname[16]; /* filename of executable */ - char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ -}; - -#define __STR(x) #x -#define STR(x) __STR(x) - #define _GET_SEG(x) \ - ({ __u32 seg; asm("movl %%" STR(x) ",%0" : "=r"(seg)); seg; }) + ({ __u32 seg; asm("movl %%" __stringify(x) ",%0" : "=r"(seg)); seg; }) /* Assumes current==process to be dumped */ +#undef ELF_CORE_COPY_REGS #define ELF_CORE_COPY_REGS(pr_reg, regs) \ pr_reg[0] = regs->rbx; \ pr_reg[1] = regs->rcx; \ @@ -138,36 +92,41 @@ struct elf_prpsinfo pr_reg[15] = regs->rsp; \ pr_reg[16] = regs->ss; -#define user user32 + +#define elf_prstatus compat_elf_prstatus +#define elf_prpsinfo compat_elf_prpsinfo +#define elf_fpregset_t struct user_i387_ia32_struct +#define elf_fpxregset_t struct user32_fxsr_struct +#define user user32 #undef elf_read_implies_exec #define elf_read_implies_exec(ex, executable_stack) (executable_stack != EXSTACK_DISABLE_X) -//#include <asm/ia32.h> -#include <linux/elf.h> - -typedef struct user_i387_ia32_struct elf_fpregset_t; -typedef struct user32_fxsr_struct elf_fpxregset_t; - -static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *regs) +#define elf_core_copy_regs elf32_core_copy_regs +static inline void elf32_core_copy_regs(compat_elf_gregset_t *elfregs, + struct pt_regs *regs) { - ELF_CORE_COPY_REGS((*elfregs), regs) + ELF_CORE_COPY_REGS((&elfregs->ebx), regs) } -static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs) +#define elf_core_copy_task_regs elf32_core_copy_task_regs +static inline int elf32_core_copy_task_regs(struct task_struct *t, + compat_elf_gregset_t* elfregs) { struct pt_regs *pp = task_pt_regs(t); - ELF_CORE_COPY_REGS((*elfregs), pp); + ELF_CORE_COPY_REGS((&elfregs->ebx), pp); /* fix wrong segments */ - (*elfregs)[7] = t->thread.ds; - (*elfregs)[9] = t->thread.fsindex; - (*elfregs)[10] = t->thread.gsindex; - (*elfregs)[8] = t->thread.es; + elfregs->ds = t->thread.ds; + elfregs->fs = t->thread.fsindex; + elfregs->gs = t->thread.gsindex; + elfregs->es = t->thread.es; return 1; } +#define elf_core_copy_task_fpregs elf32_core_copy_task_fpregs static inline int -elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpregset_t *fpu) +elf32_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, + elf_fpregset_t *fpu) { struct _fpstate_ia32 *fpstate = (void*)fpu; mm_segment_t oldfs = get_fs(); @@ -188,8 +147,10 @@ elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpr } #define ELF_CORE_COPY_XFPREGS 1 +#define ELF_CORE_XFPREG_TYPE NT_PRXFPREG +#define elf_core_copy_task_xfpregs elf32_core_copy_task_xfpregs static inline int -elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu) +elf32_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu) { struct pt_regs *regs = task_pt_regs(t); if (!tsk_used_math(t)) @@ -208,6 +169,10 @@ elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu) extern int force_personality32; +#undef ELF_EXEC_PAGESIZE +#undef ELF_HWCAP +#undef ELF_PLATFORM +#undef SET_PERSONALITY #define ELF_EXEC_PAGESIZE PAGE_SIZE #define ELF_HWCAP (boot_cpu_data.x86_capability[0]) #define ELF_PLATFORM ("i686") @@ -233,6 +198,7 @@ do { \ #define load_elf_binary load_elf32_binary +#undef ELF_PLAT_INIT #define ELF_PLAT_INIT(r, load_addr) elf32_init(r) #undef start_thread @@ -291,7 +257,6 @@ static void elf32_init(struct pt_regs *regs) static ctl_table abi_table2[] = { { - .ctl_name = 99, .procname = "vsyscall32", .data = &sysctl_vsyscall32, .maxlen = sizeof(int), diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 18b231810908..df588f0f76e1 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -159,20 +159,16 @@ sysenter_do_call: sysenter_tracesys: CFI_RESTORE_STATE + xchgl %r9d,%ebp SAVE_REST CLEAR_RREGS + movq %r9,R9(%rsp) movq $-ENOSYS,RAX(%rsp) /* really needed? */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST - movl %ebp, %ebp - /* no need to do an access_ok check here because rbp has been - 32bit zero extended */ -1: movl (%rbp),%r9d - .section __ex_table,"a" - .quad 1b,ia32_badarg - .previous + xchgl %ebp,%r9d jmp sysenter_do_call CFI_ENDPROC ENDPROC(ia32_sysenter_target) @@ -262,20 +258,17 @@ cstar_do_call: cstar_tracesys: CFI_RESTORE_STATE + xchgl %r9d,%ebp SAVE_REST CLEAR_RREGS + movq %r9,R9(%rsp) movq $-ENOSYS,RAX(%rsp) /* really needed? */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST + xchgl %ebp,%r9d movl RSP-ARGOFFSET(%rsp), %r8d - /* no need to do an access_ok check here because r8 has been - 32bit zero extended */ -1: movl (%r8),%r9d - .section __ex_table,"a" - .quad 1b,ia32_badarg - .previous jmp cstar_do_call END(ia32_cstar_target) diff --git a/arch/x86/ia32/ipc32.c b/arch/x86/ia32/ipc32.c index 2e1869ec4db4..7b3342e5aab5 100644 --- a/arch/x86/ia32/ipc32.c +++ b/arch/x86/ia32/ipc32.c @@ -9,8 +9,6 @@ #include <linux/ipc.h> #include <linux/compat.h> -#include <asm/ipc.h> - asmlinkage long sys32_ipc(u32 call, int first, int second, int third, compat_uptr_t ptr, u32 fifth) diff --git a/arch/x86/kernel/.gitignore b/arch/x86/kernel/.gitignore index 40836ad9079c..4ea38a39aed4 100644 --- a/arch/x86/kernel/.gitignore +++ b/arch/x86/kernel/.gitignore @@ -1 +1,2 @@ vsyscall.lds +vsyscall_32.lds diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 45855c97923e..38573340b143 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -3,3 +3,7 @@ include ${srctree}/arch/x86/kernel/Makefile_32 else include ${srctree}/arch/x86/kernel/Makefile_64 endif + +# Workaround to delete .lds files with make clean +# The problem is that we do not enter Makefile_32 with make clean. +clean-files := vsyscall*.lds vsyscall*.so diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32 index 7ff02063b858..a7bc93c27662 100644 --- a/arch/x86/kernel/Makefile_32 +++ b/arch/x86/kernel/Makefile_32 @@ -2,7 +2,8 @@ # Makefile for the linux kernel. # -extra-y := head_32.o init_task_32.o vmlinux.lds +extra-y := head_32.o init_task.o vmlinux.lds +CPPFLAGS_vmlinux.lds += -Ui386 obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \ ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \ @@ -17,6 +18,7 @@ obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_MICROCODE) += microcode.o +obj-$(CONFIG_PCI) += early-quirks.o obj-$(CONFIG_APM) += apm_32.o obj-$(CONFIG_X86_SMP) += smp_32.o smpboot_32.o tsc_sync.o obj-$(CONFIG_SMP) += smpcommon_32.o @@ -25,7 +27,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse_32.o obj-$(CONFIG_X86_LOCAL_APIC) += apic_32.o nmi_32.o obj-$(CONFIG_X86_IO_APIC) += io_apic_32.o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o -obj-$(CONFIG_KEXEC) += machine_kexec_32.o relocate_kernel_32.o crash_32.o +obj-$(CONFIG_KEXEC) += machine_kexec_32.o relocate_kernel_32.o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_32.o obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o @@ -51,7 +53,7 @@ obj-$(CONFIG_SCx200) += scx200_32.o # We must build both images before we can assemble it. # Note: kbuild does not track this dependency due to usage of .incbin $(obj)/vsyscall_32.o: $(obj)/vsyscall-int80_32.so $(obj)/vsyscall-sysenter_32.so -targets += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so) +targets += $(foreach F,int80 sysenter,vsyscall-$F_32.o vsyscall-$F_32.so) targets += vsyscall-note_32.o vsyscall_32.lds # The DSO images are built using a special linker script. @@ -59,7 +61,7 @@ quiet_cmd_syscall = SYSCALL $@ cmd_syscall = $(CC) -m elf_i386 -nostdlib $(SYSCFLAGS_$(@F)) \ -Wl,-T,$(filter-out FORCE,$^) -o $@ -export CPPFLAGS_vsyscall_32.lds += -P -C -U$(ARCH) +export CPPFLAGS_vsyscall_32.lds += -P -C -Ui386 vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1 \ $(call ld-option, -Wl$(comma)--hash-style=sysv) diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64 index 43da66213a47..5a88890d8ee9 100644 --- a/arch/x86/kernel/Makefile_64 +++ b/arch/x86/kernel/Makefile_64 @@ -2,34 +2,32 @@ # Makefile for the linux kernel. # -extra-y := head_64.o head64.o init_task_64.o vmlinux.lds +extra-y := head_64.o head64.o init_task.o vmlinux.lds +CPPFLAGS_vmlinux.lds += -Ux86_64 EXTRA_AFLAGS := -traditional + obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \ ptrace_64.o time_64.o ioport_64.o ldt_64.o setup_64.o i8259_64.o sys_x86_64.o \ x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \ setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \ pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \ - perfctr-watchdog.o i8253.o + i8253.o obj-$(CONFIG_STACKTRACE) += stacktrace.o -obj-$(CONFIG_X86_MCE) += mce_64.o therm_throt.o -obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o -obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o -obj-$(CONFIG_MTRR) += cpu/mtrr/ -obj-$(CONFIG_ACPI) += acpi/ +obj-y += cpu/ +obj-y += acpi/ obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_SMP) += smp_64.o smpboot_64.o trampoline_64.o tsc_sync.o obj-y += apic_64.o nmi_64.o obj-y += io_apic_64.o mpparse_64.o genapic_64.o genapic_flat_64.o -obj-$(CONFIG_KEXEC) += machine_kexec_64.o relocate_kernel_64.o crash_64.o +obj-$(CONFIG_KEXEC) += machine_kexec_64.o relocate_kernel_64.o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_64.o obj-$(CONFIG_PM) += suspend_64.o obj-$(CONFIG_HIBERNATION) += suspend_asm_64.o -obj-$(CONFIG_CPU_FREQ) += cpu/cpufreq/ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o -obj-$(CONFIG_IOMMU) += pci-gart_64.o aperture_64.o +obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o obj-$(CONFIG_KPROBES) += kprobes_64.o @@ -39,16 +37,9 @@ obj-$(CONFIG_K8_NB) += k8.o obj-$(CONFIG_AUDIT) += audit_64.o obj-$(CONFIG_MODULES) += module_64.o -obj-$(CONFIG_PCI) += early-quirks_64.o +obj-$(CONFIG_PCI) += early-quirks.o obj-y += topology.o -obj-y += intel_cacheinfo.o -obj-y += addon_cpuid_features.o obj-y += pcspeaker.o CFLAGS_vsyscall_64.o := $(PROFILING) -g0 - -therm_throt-y += cpu/mcheck/therm_throt.o -intel_cacheinfo-y += cpu/intel_cacheinfo.o -addon_cpuid_features-y += cpu/addon_cpuid_features.o -perfctr-watchdog-y += cpu/perfctr-watchdog.o diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index 3d5671939542..1351c3982ee4 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile @@ -1,5 +1,7 @@ -ifeq ($(CONFIG_X86_32),y) -include ${srctree}/arch/x86/kernel/acpi/Makefile_32 -else -include ${srctree}/arch/x86/kernel/acpi/Makefile_64 +obj-$(CONFIG_ACPI) += boot.o +obj-$(CONFIG_ACPI_SLEEP) += sleep_$(BITS).o wakeup_$(BITS).o + +ifneq ($(CONFIG_ACPI_PROCESSOR),) +obj-y += cstate.o processor.o endif + diff --git a/arch/x86/kernel/acpi/Makefile_32 b/arch/x86/kernel/acpi/Makefile_32 deleted file mode 100644 index a4852a2e9190..000000000000 --- a/arch/x86/kernel/acpi/Makefile_32 +++ /dev/null @@ -1,10 +0,0 @@ -obj-$(CONFIG_ACPI) += boot.o -ifneq ($(CONFIG_PCI),) -obj-$(CONFIG_X86_IO_APIC) += earlyquirk_32.o -endif -obj-$(CONFIG_ACPI_SLEEP) += sleep_32.o wakeup_32.o - -ifneq ($(CONFIG_ACPI_PROCESSOR),) -obj-y += cstate.o processor.o -endif - diff --git a/arch/x86/kernel/acpi/Makefile_64 b/arch/x86/kernel/acpi/Makefile_64 deleted file mode 100644 index 629425bc002d..000000000000 --- a/arch/x86/kernel/acpi/Makefile_64 +++ /dev/null @@ -1,7 +0,0 @@ -obj-y := boot.o -obj-$(CONFIG_ACPI_SLEEP) += sleep_64.o wakeup_64.o - -ifneq ($(CONFIG_ACPI_PROCESSOR),) -obj-y += processor.o cstate.o -endif - diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index afd2afe9102d..289247d974c6 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -99,7 +99,7 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; /* * The default interrupt routing model is PIC (8259). This gets - * overriden if IOAPICs are enumerated (below). + * overridden if IOAPICs are enumerated (below). */ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; @@ -414,8 +414,8 @@ acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end * * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge. - * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0) - * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0) + * ECLR1 is IRQs 0-7 (IRQ 0, 1, 2 must be 0) + * ECLR2 is IRQs 8-15 (IRQ 8, 13 must be 0) */ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) @@ -427,7 +427,7 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) old = inb(0x4d0) | (inb(0x4d1) << 8); /* - * If we use ACPI to set PCI irq's, then we should clear ELCR + * If we use ACPI to set PCI IRQs, then we should clear ELCR * since we will set it correctly as we enable the PCI irq * routing. */ @@ -555,7 +555,7 @@ EXPORT_SYMBOL(acpi_map_lsapic); int acpi_unmap_lsapic(int cpu) { - x86_cpu_to_apicid[cpu] = -1; + per_cpu(x86_cpu_to_apicid, cpu) = -1; cpu_clear(cpu, cpu_present_map); num_processors--; diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 2d39f55d29a8..10b67170b133 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -29,7 +29,7 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, unsigned int cpu) { - struct cpuinfo_x86 *c = cpu_data + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); flags->bm_check = 0; if (num_online_cpus() == 1) @@ -72,7 +72,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, struct acpi_processor_cx *cx, struct acpi_power_register *reg) { struct cstate_entry *percpu_entry; - struct cpuinfo_x86 *c = cpu_data + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); cpumask_t saved_mask; int retval; diff --git a/arch/x86/kernel/acpi/earlyquirk_32.c b/arch/x86/kernel/acpi/earlyquirk_32.c deleted file mode 100644 index 23f78efc577d..000000000000 --- a/arch/x86/kernel/acpi/earlyquirk_32.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Do early PCI probing for bug detection when the main PCI subsystem is - * not up yet. - */ -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/acpi.h> - -#include <asm/pci-direct.h> -#include <asm/acpi.h> -#include <asm/apic.h> - -#ifdef CONFIG_ACPI - -static int __init nvidia_hpet_check(struct acpi_table_header *header) -{ - return 0; -} -#endif - -static int __init check_bridge(int vendor, int device) -{ -#ifdef CONFIG_ACPI - static int warned; - /* According to Nvidia all timer overrides are bogus unless HPET - is enabled. */ - if (!acpi_use_timer_override && vendor == PCI_VENDOR_ID_NVIDIA) { - if (!warned && acpi_table_parse(ACPI_SIG_HPET, - nvidia_hpet_check)) { - warned = 1; - acpi_skip_timer_override = 1; - printk(KERN_INFO "Nvidia board " - "detected. Ignoring ACPI " - "timer override.\n"); - printk(KERN_INFO "If you got timer trouble " - "try acpi_use_timer_override\n"); - - } - } -#endif - if (vendor == PCI_VENDOR_ID_ATI && timer_over_8254 == 1) { - timer_over_8254 = 0; - printk(KERN_INFO "ATI board detected. Disabling timer routing " - "over 8254.\n"); - } - return 0; -} - -void __init check_acpi_pci(void) -{ - int num, slot, func; - - /* Assume the machine supports type 1. If not it will - always read ffffffff and should not have any side effect. - Actually a few buggy systems can machine check. Allow the user - to disable it by command line option at least -AK */ - if (!early_pci_allowed()) - return; - - /* Poor man's PCI discovery */ - for (num = 0; num < 32; num++) { - for (slot = 0; slot < 32; slot++) { - for (func = 0; func < 8; func++) { - u32 class; - u32 vendor; - class = read_pci_config(num, slot, func, - PCI_CLASS_REVISION); - if (class == 0xffffffff) - break; - - if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) - continue; - - vendor = read_pci_config(num, slot, func, - PCI_VENDOR_ID); - - if (check_bridge(vendor & 0xffff, vendor >> 16)) - return; - } - - } - } -} diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index b54fded49834..f63e5ff0aca1 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -62,8 +62,7 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) /* Initialize _PDC data based on the CPU vendor */ void arch_acpi_processor_init_pdc(struct acpi_processor *pr) { - unsigned int cpu = pr->id; - struct cpuinfo_x86 *c = cpu_data + cpu; + struct cpuinfo_x86 *c = &cpu_data(pr->id); pr->pdc = NULL; if (c->x86_vendor == X86_VENDOR_INTEL) diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index f22ba8534d26..a97313b1270e 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -11,7 +11,7 @@ # # If physical address of wakeup_code is 0x12345, BIOS should call us with # cs = 0x1234, eip = 0x05 -# +# #define BEEP \ inb $97, %al; \ @@ -52,7 +52,6 @@ wakeup_code: BEEP 1: mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board - movw $0x0e00 + 'S', %fs:(0x12) pushl $0 # Kill any dangerous flags popfl @@ -90,9 +89,6 @@ wakeup_code: # make sure %cr4 is set correctly (features, etc) movl real_save_cr4 - wakeup_code, %eax movl %eax, %cr4 - movw $0xb800, %ax - movw %ax,%fs - movw $0x0e00 + 'i', %fs:(0x12) # need a gdt -- use lgdtl to force 32-bit operands, in case # the GDT is located past 16 megabytes. @@ -102,8 +98,6 @@ wakeup_code: movl %eax, %cr0 jmp 1f 1: - movw $0x0e00 + 'n', %fs:(0x14) - movl real_magic - wakeup_code, %eax cmpl $0x12345678, %eax jne bogus_real_magic @@ -122,13 +116,11 @@ real_save_cr4: .long 0 real_magic: .long 0 video_mode: .long 0 realmode_flags: .long 0 -beep_flags: .long 0 real_efer_save_restore: .long 0 real_save_efer_edx: .long 0 real_save_efer_eax: .long 0 bogus_real_magic: - movw $0x0e00 + 'B', %fs:(0x12) jmp bogus_real_magic /* This code uses an extended set of video mode numbers. These include: @@ -194,7 +186,6 @@ wakeup_pmode_return: movw %ax, %es movw %ax, %fs movw %ax, %gs - movw $0x0e00 + 'u', 0xb8016 # reload the gdt, as we need the full 32 bit address lgdt saved_gdt @@ -218,7 +209,6 @@ wakeup_pmode_return: jmp *%eax bogus_magic: - movw $0x0e00 + 'B', 0xb8018 jmp bogus_magic diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index 8b4357e1efe0..5ed3bc5c61d7 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -4,6 +4,7 @@ #include <asm/pgtable.h> #include <asm/page.h> #include <asm/msr.h> +#include <asm/asm-offsets.h> # Copyright 2003 Pavel Machek <pavel@suse.cz>, distribute under GPLv2 # @@ -41,7 +42,6 @@ wakeup_code: # Running in *copy* of this code, somewhere in low 1MB. - movb $0xa1, %al ; outb %al, $0x80 cli cld # setup data segment @@ -65,11 +65,6 @@ wakeup_code: cmpl $0x12345678, %eax jne bogus_real_magic - call verify_cpu # Verify the cpu supports long - # mode - testl %eax, %eax - jnz no_longmode - testl $1, realmode_flags - wakeup_code jz 1f lcall $0xc000,$3 @@ -84,12 +79,6 @@ wakeup_code: call mode_set 1: - movw $0xb800, %ax - movw %ax,%fs - movw $0x0e00 + 'L', %fs:(0x10) - - movb $0xa2, %al ; outb %al, $0x80 - mov %ds, %ax # Find 32bit wakeup_code addr movzx %ax, %esi # (Convert %ds:gdt to a liner ptr) shll $4, %esi @@ -117,14 +106,10 @@ wakeup_32_vector: .code32 wakeup_32: # Running in this code, but at low address; paging is not yet turned on. - movb $0xa5, %al ; outb %al, $0x80 movl $__KERNEL_DS, %eax movl %eax, %ds - movw $0x0e00 + 'i', %ds:(0xb8012) - movb $0xa8, %al ; outb %al, $0x80; - /* * Prepare for entering 64bits mode */ @@ -200,16 +185,11 @@ wakeup_long64: */ lgdt cpu_gdt_descr - movw $0x0e00 + 'n', %ds:(0xb8014) - movb $0xa9, %al ; outb %al, $0x80 - movq saved_magic, %rax movq $0x123456789abcdef0, %rdx cmpq %rdx, %rax jne bogus_64_magic - movw $0x0e00 + 'u', %ds:(0xb8016) - nop nop movw $__KERNEL_DS, %ax @@ -220,13 +200,11 @@ wakeup_long64: movw %ax, %gs movq saved_rsp, %rsp - movw $0x0e00 + 'x', %ds:(0xb8018) movq saved_rbx, %rbx movq saved_rdi, %rdi movq saved_rsi, %rsi movq saved_rbp, %rbp - movw $0x0e00 + '!', %ds:(0xb801a) movq saved_rip, %rax jmp *%rax @@ -256,21 +234,12 @@ realmode_flags: .quad 0 .code16 bogus_real_magic: - movb $0xba,%al ; outb %al,$0x80 jmp bogus_real_magic .code64 bogus_64_magic: - movb $0xb3,%al ; outb %al,$0x80 jmp bogus_64_magic -.code16 -no_longmode: - movb $0xbc,%al ; outb %al,$0x80 - jmp no_longmode - -#include "../verify_cpu_64.S" - /* This code uses an extended set of video mode numbers. These include: * Aliases for standard modes * NORMAL_VGA (-1) @@ -374,31 +343,32 @@ do_suspend_lowlevel: xorl %eax, %eax call save_processor_state - movq %rsp, saved_context_esp(%rip) - movq %rax, saved_context_eax(%rip) - movq %rbx, saved_context_ebx(%rip) - movq %rcx, saved_context_ecx(%rip) - movq %rdx, saved_context_edx(%rip) - movq %rbp, saved_context_ebp(%rip) - movq %rsi, saved_context_esi(%rip) - movq %rdi, saved_context_edi(%rip) - movq %r8, saved_context_r08(%rip) - movq %r9, saved_context_r09(%rip) - movq %r10, saved_context_r10(%rip) - movq %r11, saved_context_r11(%rip) - movq %r12, saved_context_r12(%rip) - movq %r13, saved_context_r13(%rip) - movq %r14, saved_context_r14(%rip) - movq %r15, saved_context_r15(%rip) - pushfq ; popq saved_context_eflags(%rip) + movq $saved_context, %rax + movq %rsp, pt_regs_rsp(%rax) + movq %rbp, pt_regs_rbp(%rax) + movq %rsi, pt_regs_rsi(%rax) + movq %rdi, pt_regs_rdi(%rax) + movq %rbx, pt_regs_rbx(%rax) + movq %rcx, pt_regs_rcx(%rax) + movq %rdx, pt_regs_rdx(%rax) + movq %r8, pt_regs_r8(%rax) + movq %r9, pt_regs_r9(%rax) + movq %r10, pt_regs_r10(%rax) + movq %r11, pt_regs_r11(%rax) + movq %r12, pt_regs_r12(%rax) + movq %r13, pt_regs_r13(%rax) + movq %r14, pt_regs_r14(%rax) + movq %r15, pt_regs_r15(%rax) + pushfq + popq pt_regs_eflags(%rax) movq $.L97, saved_rip(%rip) - movq %rsp,saved_rsp - movq %rbp,saved_rbp - movq %rbx,saved_rbx - movq %rdi,saved_rdi - movq %rsi,saved_rsi + movq %rsp, saved_rsp + movq %rbp, saved_rbp + movq %rbx, saved_rbx + movq %rdi, saved_rdi + movq %rsi, saved_rsi addq $8, %rsp movl $3, %edi @@ -409,32 +379,35 @@ do_suspend_lowlevel: .L99: .align 4 movl $24, %eax - movw %ax, %ds - movq saved_context+58(%rip), %rax - movq %rax, %cr4 - movq saved_context+50(%rip), %rax - movq %rax, %cr3 - movq saved_context+42(%rip), %rax - movq %rax, %cr2 - movq saved_context+34(%rip), %rax - movq %rax, %cr0 - pushq saved_context_eflags(%rip) ; popfq - movq saved_context_esp(%rip), %rsp - movq saved_context_ebp(%rip), %rbp - movq saved_context_eax(%rip), %rax - movq saved_context_ebx(%rip), %rbx - movq saved_context_ecx(%rip), %rcx - movq saved_context_edx(%rip), %rdx - movq saved_context_esi(%rip), %rsi - movq saved_context_edi(%rip), %rdi - movq saved_context_r08(%rip), %r8 - movq saved_context_r09(%rip), %r9 - movq saved_context_r10(%rip), %r10 - movq saved_context_r11(%rip), %r11 - movq saved_context_r12(%rip), %r12 - movq saved_context_r13(%rip), %r13 - movq saved_context_r14(%rip), %r14 - movq saved_context_r15(%rip), %r15 + movw %ax, %ds + + /* We don't restore %rax, it must be 0 anyway */ + movq $saved_context, %rax + movq saved_context_cr4(%rax), %rbx + movq %rbx, %cr4 + movq saved_context_cr3(%rax), %rbx + movq %rbx, %cr3 + movq saved_context_cr2(%rax), %rbx + movq %rbx, %cr2 + movq saved_context_cr0(%rax), %rbx + movq %rbx, %cr0 + pushq pt_regs_eflags(%rax) + popfq + movq pt_regs_rsp(%rax), %rsp + movq pt_regs_rbp(%rax), %rbp + movq pt_regs_rsi(%rax), %rsi + movq pt_regs_rdi(%rax), %rdi + movq pt_regs_rbx(%rax), %rbx + movq pt_regs_rcx(%rax), %rcx + movq pt_regs_rdx(%rax), %rdx + movq pt_regs_r8(%rax), %r8 + movq pt_regs_r9(%rax), %r9 + movq pt_regs_r10(%rax), %r10 + movq pt_regs_r11(%rax), %r11 + movq pt_regs_r12(%rax), %r12 + movq pt_regs_r13(%rax), %r13 + movq pt_regs_r14(%rax), %r14 + movq pt_regs_r15(%rax), %r15 xorl %eax, %eax addq $8, %rsp diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 11b03d3c6fda..d6405e0842b5 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -63,11 +63,11 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt); /* Use inline assembly to define this because the nops are defined as inline assembly strings in the include files and we cannot get them easily into strings. */ -asm("\t.data\nintelnops: " +asm("\t.section .rodata, \"a\"\nintelnops: " GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 GENERIC_NOP7 GENERIC_NOP8); -extern unsigned char intelnops[]; -static unsigned char *intel_nops[ASM_NOP_MAX+1] = { +extern const unsigned char intelnops[]; +static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = { NULL, intelnops, intelnops + 1, @@ -81,11 +81,11 @@ static unsigned char *intel_nops[ASM_NOP_MAX+1] = { #endif #ifdef K8_NOP1 -asm("\t.data\nk8nops: " +asm("\t.section .rodata, \"a\"\nk8nops: " K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 K8_NOP7 K8_NOP8); -extern unsigned char k8nops[]; -static unsigned char *k8_nops[ASM_NOP_MAX+1] = { +extern const unsigned char k8nops[]; +static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = { NULL, k8nops, k8nops + 1, @@ -99,11 +99,11 @@ static unsigned char *k8_nops[ASM_NOP_MAX+1] = { #endif #ifdef K7_NOP1 -asm("\t.data\nk7nops: " +asm("\t.section .rodata, \"a\"\nk7nops: " K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 K7_NOP7 K7_NOP8); -extern unsigned char k7nops[]; -static unsigned char *k7_nops[ASM_NOP_MAX+1] = { +extern const unsigned char k7nops[]; +static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = { NULL, k7nops, k7nops + 1, @@ -116,28 +116,49 @@ static unsigned char *k7_nops[ASM_NOP_MAX+1] = { }; #endif +#ifdef P6_NOP1 +asm("\t.section .rodata, \"a\"\np6nops: " + P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6 + P6_NOP7 P6_NOP8); +extern const unsigned char p6nops[]; +static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { + NULL, + p6nops, + p6nops + 1, + p6nops + 1 + 2, + p6nops + 1 + 2 + 3, + p6nops + 1 + 2 + 3 + 4, + p6nops + 1 + 2 + 3 + 4 + 5, + p6nops + 1 + 2 + 3 + 4 + 5 + 6, + p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, +}; +#endif + #ifdef CONFIG_X86_64 extern char __vsyscall_0; -static inline unsigned char** find_nop_table(void) +static inline const unsigned char*const * find_nop_table(void) { - return k8_nops; + return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || + boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; } #else /* CONFIG_X86_64 */ -static struct nop { +static const struct nop { int cpuid; - unsigned char **noptable; + const unsigned char *const *noptable; } noptypes[] = { { X86_FEATURE_K8, k8_nops }, { X86_FEATURE_K7, k7_nops }, + { X86_FEATURE_P4, p6_nops }, + { X86_FEATURE_P3, p6_nops }, { -1, NULL } }; -static unsigned char** find_nop_table(void) +static const unsigned char*const * find_nop_table(void) { - unsigned char **noptable = intel_nops; + const unsigned char *const *noptable = intel_nops; int i; for (i = 0; noptypes[i].cpuid >= 0; i++) { @@ -154,7 +175,7 @@ static unsigned char** find_nop_table(void) /* Use this to add nops to a buffer, then text_poke the whole buffer. */ static void add_nops(void *insns, unsigned int len) { - unsigned char **noptable = find_nop_table(); + const unsigned char *const *noptable = find_nop_table(); while (len > 0) { unsigned int noplen = len; @@ -336,14 +357,14 @@ void alternatives_smp_switch(int smp) if (smp) { printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); - clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + clear_bit(X86_FEATURE_UP, cpu_data(0).x86_capability); list_for_each_entry(mod, &smp_alt_modules, next) alternatives_smp_lock(mod->locks, mod->locks_end, mod->text, mod->text_end); } else { printk(KERN_INFO "SMP alternatives: switching to UP code\n"); set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); - set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability); list_for_each_entry(mod, &smp_alt_modules, next) alternatives_smp_unlock(mod->locks, mod->locks_end, mod->text, mod->text_end); @@ -369,8 +390,8 @@ void apply_paravirt(struct paravirt_patch_site *start, BUG_ON(p->len > MAX_PATCH_LEN); /* prep the buffer with the original instructions */ memcpy(insnbuf, p->instr, p->len); - used = paravirt_ops.patch(p->instrtype, p->clobbers, insnbuf, - (unsigned long)p->instr, p->len); + used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf, + (unsigned long)p->instr, p->len); BUG_ON(used > p->len); @@ -411,13 +432,10 @@ void __init alternative_instructions(void) if (1 == num_possible_cpus()) { printk(KERN_INFO "SMP alternatives: switching to UP code\n"); set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); - set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability); alternatives_smp_unlock(__smp_locks, __smp_locks_end, _text, _etext); } - free_init_pages("SMP alternatives", - (unsigned long)__smp_locks, - (unsigned long)__smp_locks_end); } else { alternatives_smp_module_add(NULL, "core kernel", __smp_locks, __smp_locks_end, @@ -428,6 +446,11 @@ void __init alternative_instructions(void) apply_paravirt(__parainstructions, __parainstructions_end); local_irq_restore(flags); + if (smp_alt_once) + free_init_pages("SMP alternatives", + (unsigned long)__smp_locks, + (unsigned long)__smp_locks_end); + restart_nmi(); #ifdef CONFIG_X86_MCE restart_mce(); diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 8f681cae7bf7..5b6992799c9d 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -20,14 +20,14 @@ #include <linux/ioport.h> #include <asm/e820.h> #include <asm/io.h> -#include <asm/iommu.h> +#include <asm/gart.h> #include <asm/pci-direct.h> #include <asm/dma.h> #include <asm/k8.h> -int iommu_aperture; -int iommu_aperture_disabled __initdata = 0; -int iommu_aperture_allowed __initdata = 0; +int gart_iommu_aperture; +int gart_iommu_aperture_disabled __initdata = 0; +int gart_iommu_aperture_allowed __initdata = 0; int fallback_aper_order __initdata = 1; /* 64MB */ int fallback_aper_force __initdata = 0; @@ -204,14 +204,15 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) return 0; } -void __init iommu_hole_init(void) +void __init gart_iommu_hole_init(void) { int fix, num; u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; u64 aper_base, last_aper_base = 0; int valid_agp = 0; - if (iommu_aperture_disabled || !fix_aperture || !early_pci_allowed()) + if (gart_iommu_aperture_disabled || !fix_aperture || + !early_pci_allowed()) return; printk(KERN_INFO "Checking aperture...\n"); @@ -222,7 +223,7 @@ void __init iommu_hole_init(void) continue; iommu_detected = 1; - iommu_aperture = 1; + gart_iommu_aperture = 1; aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; aper_size = (32 * 1024 * 1024) << aper_order; diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 3d67ae18d762..08b07c176962 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -947,7 +947,7 @@ void __devinit setup_local_APIC(void) * Set up LVT0, LVT1: * * set up through-local-APIC on the BP's LINT0. This is not - * strictly necessery in pure symmetric-IO mode, but sometimes + * strictly necessary in pure symmetric-IO mode, but sometimes * we delegate interrupts to the 8259A. */ /* @@ -998,7 +998,7 @@ void __devinit setup_local_APIC(void) } else { if (esr_disable) /* - * Something untraceble is creating bad interrupts on + * Something untraceable is creating bad interrupts on * secondary quads ... for the moment, just leave the * ESR disabled - we can't do anything useful with the * errors anyway - mbligh @@ -1277,6 +1277,7 @@ void smp_spurious_interrupt(struct pt_regs *regs) /* see sw-dev-man vol 3, chapter 7.4.13.5 */ printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " "should never happen.\n", smp_processor_id()); + __get_cpu_var(irq_stat).irq_spurious_count++; irq_exit(); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 09b82093bc75..f28ccb588fba 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -287,6 +287,20 @@ void disable_local_APIC(void) apic_write(APIC_SPIV, value); } +void lapic_shutdown(void) +{ + unsigned long flags; + + if (!cpu_has_apic) + return; + + local_irq_save(flags); + + disable_local_APIC(); + + local_irq_restore(flags); +} + /* * This is to verify that we're looking at a real local APIC. * Check these against your board if the CPUs aren't getting @@ -974,15 +988,12 @@ void __init setup_boot_APIC_clock (void) */ void __cpuinit check_boot_apic_timer_broadcast(void) { - struct clock_event_device *levt = &per_cpu(lapic_events, boot_cpu_id); - if (!disable_apic_timer || (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY)) return; printk(KERN_INFO "AMD C1E detected late. Force timer broadcast.\n"); lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY; - levt->features |= CLOCK_EVT_FEAT_DUMMY; local_irq_enable(); clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &boot_cpu_id); @@ -1143,6 +1154,7 @@ asmlinkage void smp_spurious_interrupt(void) if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) ack_APIC_irq(); + add_pda(irq_spurious_count, 1); irq_exit(); } diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 32f2365c26ed..17089a041028 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -57,7 +57,7 @@ * screen-blanking and gpm (Stephen Rothwell); Linux 1.99.4 * 1.2a:Simple change to stop mysterious bug reports with SMP also added * levels to the printk calls. APM is not defined for SMP machines. - * The new replacment for it is, but Linux doesn't yet support this. + * The new replacement for it is, but Linux doesn't yet support this. * Alan Cox Linux 2.1.55 * 1.3: Set up a valid data descriptor 0x40 for buggy BIOS's * 1.4: Upgraded to support APM 1.2. Integrated ThinkPad suspend patch by diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 8029742c0fc1..0e45981b2dd7 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -15,6 +15,7 @@ #include <asm/fixmap.h> #include <asm/processor.h> #include <asm/thread_info.h> +#include <asm/bootparam.h> #include <asm/elf.h> #include <xen/interface/xen.h> @@ -116,12 +117,14 @@ void foo(void) #ifdef CONFIG_PARAVIRT BLANK(); - OFFSET(PARAVIRT_enabled, paravirt_ops, paravirt_enabled); - OFFSET(PARAVIRT_irq_disable, paravirt_ops, irq_disable); - OFFSET(PARAVIRT_irq_enable, paravirt_ops, irq_enable); - OFFSET(PARAVIRT_irq_enable_sysexit, paravirt_ops, irq_enable_sysexit); - OFFSET(PARAVIRT_iret, paravirt_ops, iret); - OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0); + OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); + OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops); + OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops); + OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); + OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); + OFFSET(PV_CPU_iret, pv_cpu_ops, iret); + OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); + OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); #endif #ifdef CONFIG_XEN @@ -133,6 +136,7 @@ void foo(void) #ifdef CONFIG_LGUEST_GUEST BLANK(); OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); + OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir); OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc); OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc); OFFSET(LGUEST_PAGES_host_cr3, lguest_pages, state.host_cr3); @@ -144,4 +148,10 @@ void foo(void) OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); #endif + + BLANK(); + OFFSET(BP_scratch, boot_params, scratch); + OFFSET(BP_loadflags, boot_params, hdr.loadflags); + OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); + OFFSET(BP_version, boot_params, hdr.version); } diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 778953bc636c..d1b6ed98774e 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -15,12 +15,16 @@ #include <asm/segment.h> #include <asm/thread_info.h> #include <asm/ia32.h> +#include <asm/bootparam.h> #define DEFINE(sym, val) \ asm volatile("\n->" #sym " %0 " #val : : "i" (val)) #define BLANK() asm volatile("\n->" : : ) +#define OFFSET(sym, str, mem) \ + DEFINE(sym, offsetof(struct str, mem)) + #define __NO_STUBS 1 #undef __SYSCALL #undef _ASM_X86_64_UNISTD_H_ @@ -76,10 +80,44 @@ int main(void) DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); DEFINE(pbe_next, offsetof(struct pbe, next)); BLANK(); +#define ENTRY(entry) DEFINE(pt_regs_ ## entry, offsetof(struct pt_regs, entry)) + ENTRY(rbx); + ENTRY(rbx); + ENTRY(rcx); + ENTRY(rdx); + ENTRY(rsp); + ENTRY(rbp); + ENTRY(rsi); + ENTRY(rdi); + ENTRY(r8); + ENTRY(r9); + ENTRY(r10); + ENTRY(r11); + ENTRY(r12); + ENTRY(r13); + ENTRY(r14); + ENTRY(r15); + ENTRY(eflags); + BLANK(); +#undef ENTRY +#define ENTRY(entry) DEFINE(saved_context_ ## entry, offsetof(struct saved_context, entry)) + ENTRY(cr0); + ENTRY(cr2); + ENTRY(cr3); + ENTRY(cr4); + ENTRY(cr8); + BLANK(); +#undef ENTRY DEFINE(TSS_ist, offsetof(struct tss_struct, ist)); BLANK(); DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx)); BLANK(); DEFINE(__NR_syscall_max, sizeof(syscalls) - 1); + + BLANK(); + OFFSET(BP_scratch, boot_params, scratch); + OFFSET(BP_loadflags, boot_params, hdr.loadflags); + OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); + OFFSET(BP_version, boot_params, hdr.version); return 0; } diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 778396c78d65..cfdb2f3bd763 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -2,19 +2,19 @@ # Makefile for x86-compatible CPU details and quirks # -obj-y := common.o proc.o bugs.o +obj-y := intel_cacheinfo.o addon_cpuid_features.o -obj-y += amd.o -obj-y += cyrix.o -obj-y += centaur.o -obj-y += transmeta.o -obj-y += intel.o intel_cacheinfo.o addon_cpuid_features.o -obj-y += nexgen.o -obj-y += umc.o +obj-$(CONFIG_X86_32) += common.o proc.o bugs.o +obj-$(CONFIG_X86_32) += amd.o +obj-$(CONFIG_X86_32) += cyrix.o +obj-$(CONFIG_X86_32) += centaur.o +obj-$(CONFIG_X86_32) += transmeta.o +obj-$(CONFIG_X86_32) += intel.o +obj-$(CONFIG_X86_32) += nexgen.o +obj-$(CONFIG_X86_32) += umc.o -obj-$(CONFIG_X86_MCE) += mcheck/ - -obj-$(CONFIG_MTRR) += mtrr/ -obj-$(CONFIG_CPU_FREQ) += cpufreq/ +obj-$(CONFIG_X86_MCE) += mcheck/ +obj-$(CONFIG_MTRR) += mtrr/ +obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index dcf6bbb1c7c0..1ff88c7f45cf 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -4,6 +4,7 @@ #include <asm/io.h> #include <asm/processor.h> #include <asm/apic.h> +#include <asm/mach_apic.h> #include "cpu.h" @@ -45,13 +46,17 @@ static __cpuinit int amd_apic_timer_broken(void) case CPUID_XFAM_10H: case CPUID_XFAM_11H: rdmsr(MSR_K8_ENABLE_C1E, lo, hi); - if (lo & ENABLE_C1E_MASK) + if (lo & ENABLE_C1E_MASK) { + if (smp_processor_id() != boot_cpu_physical_apicid) + printk(KERN_INFO "AMD C1E detected late. " + " Force timer broadcast.\n"); return 1; - break; - default: - /* err on the side of caution */ + } + break; + default: + /* err on the side of caution */ return 1; - } + } return 0; } #endif @@ -261,7 +266,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_HT /* * On a AMD multi core setup the lower bits of the APIC id - * distingush the cores. + * distinguish the cores. */ if (c->x86_max_cores > 1) { int cpu = smp_processor_id(); diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 473eac883c7b..9681fa15ddf0 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -53,7 +53,7 @@ static u32 __cpuinit ramtop(void) /* 16388 */ continue; /* * Don't MCR over reserved space. Ignore the ISA hole - * we frob around that catastrophy already + * we frob around that catastrophe already */ if (e820.map[i].type == E820_RESERVED) @@ -287,7 +287,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) c->x86_capability[5] = cpuid_edx(0xC0000001); } - /* Cyrix III family needs CX8 & PGE explicity enabled. */ + /* Cyrix III family needs CX8 & PGE explicitly enabled. */ if (c->x86_model >=6 && c->x86_model <= 9) { rdmsr (MSR_VIA_FCR, lo, hi); lo |= (1<<1 | 1<<7); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d506201d397c..e2fcf2051bdb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -207,7 +207,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) static int __init x86_fxsr_setup(char * s) { - /* Tell all the other CPU's to not use it... */ + /* Tell all the other CPUs to not use it... */ disable_x86_fxsr = 1; /* diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index d8c6f132dc7a..151eda0a23fc 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig @@ -19,6 +19,9 @@ config X86_ACPI_CPUFREQ Processor Performance States. This driver also supports Intel Enhanced Speedstep. + To compile this driver as a module, choose M here: the + module will be called acpi-cpufreq. + For details, take a look at <file:Documentation/cpu-freq/>. If in doubt, say N. @@ -26,7 +29,7 @@ config X86_ACPI_CPUFREQ config ELAN_CPUFREQ tristate "AMD Elan SC400 and SC410" select CPU_FREQ_TABLE - depends on X86_ELAN + depends on X86_32 && X86_ELAN ---help--- This adds the CPUFreq driver for AMD Elan SC400 and SC410 processors. @@ -42,7 +45,7 @@ config ELAN_CPUFREQ config SC520_CPUFREQ tristate "AMD Elan SC520" select CPU_FREQ_TABLE - depends on X86_ELAN + depends on X86_32 && X86_ELAN ---help--- This adds the CPUFreq driver for AMD Elan SC520 processor. @@ -54,6 +57,7 @@ config SC520_CPUFREQ config X86_POWERNOW_K6 tristate "AMD Mobile K6-2/K6-3 PowerNow!" select CPU_FREQ_TABLE + depends on X86_32 help This adds the CPUFreq driver for mobile AMD K6-2+ and mobile AMD K6-3+ processors. @@ -65,6 +69,7 @@ config X86_POWERNOW_K6 config X86_POWERNOW_K7 tristate "AMD Mobile Athlon/Duron PowerNow!" select CPU_FREQ_TABLE + depends on X86_32 help This adds the CPUFreq driver for mobile AMD K7 mobile processors. @@ -76,23 +81,27 @@ config X86_POWERNOW_K7_ACPI bool depends on X86_POWERNOW_K7 && ACPI_PROCESSOR depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m) + depends on X86_32 default y config X86_POWERNOW_K8 tristate "AMD Opteron/Athlon64 PowerNow!" select CPU_FREQ_TABLE - depends on EXPERIMENTAL help This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors. + To compile this driver as a module, choose M here: the + module will be called powernow-k8. + For details, take a look at <file:Documentation/cpu-freq/>. If in doubt, say N. config X86_POWERNOW_K8_ACPI - bool "ACPI Support" - select ACPI_PROCESSOR - depends on ACPI && X86_POWERNOW_K8 + bool + prompt "ACPI Support" if X86_32 + depends on ACPI && X86_POWERNOW_K8 && ACPI_PROCESSOR + depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m) default y help This provides access to the K8s Processor Performance States via ACPI. @@ -104,7 +113,7 @@ config X86_POWERNOW_K8_ACPI config X86_GX_SUSPMOD tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" - depends on PCI + depends on X86_32 && PCI help This add the CPUFreq driver for NatSemi Geode processors which support suspend modulation. @@ -114,15 +123,20 @@ config X86_GX_SUSPMOD If in doubt, say N. config X86_SPEEDSTEP_CENTRINO - tristate "Intel Enhanced SpeedStep" + tristate "Intel Enhanced SpeedStep (deprecated)" select CPU_FREQ_TABLE - select X86_SPEEDSTEP_CENTRINO_TABLE + select X86_SPEEDSTEP_CENTRINO_TABLE if X86_32 + depends on X86_32 || (X86_64 && ACPI_PROCESSOR) help + This is deprecated and this functionality is now merged into + acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of + speedstep_centrino. This adds the CPUFreq driver for Enhanced SpeedStep enabled - mobile CPUs. This means Intel Pentium M (Centrino) CPUs. However, - you also need to say Y to "Use ACPI tables to decode..." below - [which might imply enabling ACPI] if you want to use this driver - on non-Banias CPUs. + mobile CPUs. This means Intel Pentium M (Centrino) CPUs + or 64bit enabled Intel Xeons. + + To compile this driver as a module, choose M here: the + module will be called speedstep-centrino. For details, take a look at <file:Documentation/cpu-freq/>. @@ -130,7 +144,7 @@ config X86_SPEEDSTEP_CENTRINO config X86_SPEEDSTEP_CENTRINO_TABLE bool "Built-in tables for Banias CPUs" - depends on X86_SPEEDSTEP_CENTRINO + depends on X86_32 && X86_SPEEDSTEP_CENTRINO default y help Use built-in tables for Banias CPUs if ACPI encoding @@ -141,6 +155,7 @@ config X86_SPEEDSTEP_CENTRINO_TABLE config X86_SPEEDSTEP_ICH tristate "Intel Speedstep on ICH-M chipsets (ioport interface)" select CPU_FREQ_TABLE + depends on X86_32 help This adds the CPUFreq driver for certain mobile Intel Pentium III (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all @@ -154,7 +169,7 @@ config X86_SPEEDSTEP_ICH config X86_SPEEDSTEP_SMI tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)" select CPU_FREQ_TABLE - depends on EXPERIMENTAL + depends on X86_32 && EXPERIMENTAL help This adds the CPUFreq driver for certain mobile Intel Pentium III (Coppermine), all mobile Intel Pentium III-M (Tualatin) @@ -169,15 +184,24 @@ config X86_P4_CLOCKMOD select CPU_FREQ_TABLE help This adds the CPUFreq driver for Intel Pentium 4 / XEON - processors. + processors. When enabled it will lower CPU temperature by skipping + clocks. + + This driver should be only used in exceptional + circumstances when very low power is needed because it causes severe + slowdowns and noticeable latencies. Normally Speedstep should be used + instead. + + To compile this driver as a module, choose M here: the + module will be called p4-clockmod. For details, take a look at <file:Documentation/cpu-freq/>. - If in doubt, say N. + Unless you are absolutely sure say N. config X86_CPUFREQ_NFORCE2 tristate "nVidia nForce2 FSB changing" - depends on EXPERIMENTAL + depends on X86_32 && EXPERIMENTAL help This adds the CPUFreq driver for FSB changing on nVidia nForce2 platforms. @@ -188,6 +212,7 @@ config X86_CPUFREQ_NFORCE2 config X86_LONGRUN tristate "Transmeta LongRun" + depends on X86_32 help This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors which support LongRun. @@ -199,7 +224,7 @@ config X86_LONGRUN config X86_LONGHAUL tristate "VIA Cyrix III Longhaul" select CPU_FREQ_TABLE - depends on ACPI_PROCESSOR + depends on X86_32 && ACPI_PROCESSOR help This adds the CPUFreq driver for VIA Samuel/CyrixIII, VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T @@ -212,7 +237,7 @@ config X86_LONGHAUL config X86_E_POWERSAVER tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" select CPU_FREQ_TABLE - depends on EXPERIMENTAL + depends on X86_32 && EXPERIMENTAL help This adds the CPUFreq driver for VIA C7 processors. @@ -233,11 +258,11 @@ config X86_ACPI_CPUFREQ_PROC_INTF config X86_SPEEDSTEP_LIB tristate - default X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD + default (X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD) config X86_SPEEDSTEP_RELAXED_CAP_CHECK bool "Relaxed speedstep capability checks" - depends on (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH) + depends on X86_32 && (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH) help Don't perform all checks for a speedstep capable system which would normally be done. Some ancient or strange systems, though speedstep diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index ffd01e5dcb52..fea0af0476b9 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -77,7 +77,7 @@ static unsigned int acpi_pstate_strict; static int check_est_cpu(unsigned int cpuid) { - struct cpuinfo_x86 *cpu = &cpu_data[cpuid]; + struct cpuinfo_x86 *cpu = &cpu_data(cpuid); if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) @@ -560,7 +560,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) unsigned int cpu = policy->cpu; struct acpi_cpufreq_data *data; unsigned int result = 0; - struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; + struct cpuinfo_x86 *c = &cpu_data(policy->cpu); struct acpi_processor_performance *perf; dprintk("acpi_cpufreq_cpu_init\n"); @@ -595,7 +595,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) dmi_check_system(sw_any_bug_dmi_table); if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) { policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; - policy->cpus = cpu_core_map[cpu]; + policy->cpus = per_cpu(cpu_core_map, cpu); } #endif diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c index 32f0bda3fc95..f03e9153618e 100644 --- a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -260,7 +260,7 @@ static int nforce2_target(struct cpufreq_policy *policy, freqs.old = nforce2_get(policy->cpu); freqs.new = target_fsb * fid * 100; - freqs.cpu = 0; /* Only one CPU on nForce2 plattforms */ + freqs.cpu = 0; /* Only one CPU on nForce2 platforms */ if (freqs.old == freqs.new) return 0; diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c index c11baaf9f2b4..326a4c81f684 100644 --- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c +++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c @@ -305,7 +305,7 @@ static struct cpufreq_driver eps_driver = { static int __init eps_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); /* This driver will work only on Centaur C7 processors with * Enhanced SpeedStep/PowerSaver registers */ diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index 1e7ae7dafcf6..94619c22f563 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c @@ -199,7 +199,7 @@ static int elanfreq_target (struct cpufreq_policy *policy, static int elanfreq_cpu_init(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); unsigned int i; int result; @@ -280,7 +280,7 @@ static struct cpufreq_driver elanfreq_driver = { static int __init elanfreq_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); /* Test if we have the right hardware */ if ((c->x86_vendor != X86_VENDOR_AMD) || diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c index ed2bda127c44..2ed7db2fd257 100644 --- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c @@ -12,12 +12,12 @@ * of any nature resulting due to the use of this software. This * software is provided AS-IS with no warranties. * - * Theoritical note: + * Theoretical note: * * (see Geode(tm) CS5530 manual (rev.4.1) page.56) * * CPU frequency control on NatSemi Geode GX1/GXLV processor and CS55x0 - * are based on Suspend Moduration. + * are based on Suspend Modulation. * * Suspend Modulation works by asserting and de-asserting the SUSP# pin * to CPU(GX1/GXLV) for configurable durations. When asserting SUSP# @@ -101,11 +101,11 @@ /* SUSCFG bits */ #define SUSMOD (1<<0) /* enable/disable suspend modulation */ -/* the belows support only with cs5530 (after rev.1.2)/cs5530A */ +/* the below is supported only with cs5530 (after rev.1.2)/cs5530A */ #define SMISPDUP (1<<1) /* select how SMI re-enable suspend modulation: */ /* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */ #define SUSCFG (1<<2) /* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */ -/* the belows support only with cs5530A */ +/* the below is supported only with cs5530A */ #define PWRSVE_ISA (1<<3) /* stop ISA clock */ #define PWRSVE (1<<4) /* active idle */ diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index 5045f5d583c8..749d00cb2ebd 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -780,7 +780,7 @@ static int longhaul_setup_southbridge(void) static int __init longhaul_cpu_init(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); char *cpuname=NULL; int ret; u32 lo, hi; @@ -959,7 +959,7 @@ static struct cpufreq_driver longhaul_driver = { static int __init longhaul_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6) return -ENODEV; diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c index b2689514295a..af4a867a097c 100644 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c @@ -172,7 +172,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, u32 save_lo, save_hi; u32 eax, ebx, ecx, edx; u32 try_hi; - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); if (!low_freq || !high_freq) return -EINVAL; @@ -298,7 +298,7 @@ static struct cpufreq_driver longrun_driver = { */ static int __init longrun_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); if (c->x86_vendor != X86_VENDOR_TRANSMETA || !cpu_has(c, X86_FEATURE_LONGRUN)) diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 8eb414b906d2..14791ec55cfd 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -195,12 +195,12 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; + struct cpuinfo_x86 *c = &cpu_data(policy->cpu); int cpuid = 0; unsigned int i; #ifdef CONFIG_SMP - policy->cpus = cpu_sibling_map[policy->cpu]; + policy->cpus = per_cpu(cpu_sibling_map, policy->cpu); #endif /* Errata workaround */ @@ -279,7 +279,7 @@ static struct cpufreq_driver p4clockmod_driver = { static int __init cpufreq_p4_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); int ret; /* diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index 6d0285339317..eb9b62b0830c 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -1,6 +1,6 @@ /* * This file was based upon code in Powertweak Linux (http://powertweak.sf.net) - * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, Dominik Brodowski. + * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, Dominik Brodowski. * * Licensed under the terms of the GNU GPL License version 2. * @@ -215,7 +215,7 @@ static struct cpufreq_driver powernow_k6_driver = { */ static int __init powernow_k6_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) || ((c->x86_model != 12) && (c->x86_model != 13))) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 7decd6a50ffa..b5a9863d6cdc 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -114,7 +114,7 @@ static int check_fsb(unsigned int fsbspeed) static int check_powernow(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); unsigned int maxei, eax, ebx, ecx, edx; if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) { @@ -565,7 +565,7 @@ static unsigned int powernow_get(unsigned int cpu) } -static int __init acer_cpufreq_pst(struct dmi_system_id *d) +static int __init acer_cpufreq_pst(const struct dmi_system_id *d) { printk(KERN_WARNING "%s laptop with broken PST tables in BIOS detected.\n", d->ident); printk(KERN_WARNING "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n"); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index f105ffd09960..99e1ef9939be 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -57,7 +57,7 @@ static struct powernow_k8_data *powernow_data[NR_CPUS]; static int cpu_family = CPU_OPTERON; #ifndef CONFIG_SMP -static cpumask_t cpu_core_map[1]; +DEFINE_PER_CPU(cpumask_t, cpu_core_map); #endif /* Return a frequency in MHz, given an input fid */ @@ -144,7 +144,7 @@ static void count_off_irt(struct powernow_k8_data *data) return; } -/* the voltage stabalization time */ +/* the voltage stabilization time */ static void count_off_vst(struct powernow_k8_data *data) { udelay(data->vstable * VST_UNITS_20US); @@ -643,7 +643,7 @@ static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); data->powernow_table = powernow_table; - if (first_cpu(cpu_core_map[data->cpu]) == data->cpu) + if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) print_basics(data); for (j = 0; j < data->numps; j++) @@ -797,7 +797,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) /* fill in data */ data->numps = data->acpi_data.state_count; - if (first_cpu(cpu_core_map[data->cpu]) == data->cpu) + if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) print_basics(data); powernow_k8_acpi_pst_values(data, 0); @@ -1171,7 +1171,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) if (cpu_family == CPU_HW_PSTATE) pol->cpus = cpumask_of_cpu(pol->cpu); else - pol->cpus = cpu_core_map[pol->cpu]; + pol->cpus = per_cpu(cpu_core_map, pol->cpu); data->available_cores = &(pol->cpus); /* Take a crude guess here. @@ -1237,7 +1237,7 @@ static unsigned int powernowk8_get (unsigned int cpu) cpumask_t oldmask = current->cpus_allowed; unsigned int khz = 0; - data = powernow_data[first_cpu(cpu_core_map[cpu])]; + data = powernow_data[first_cpu(per_cpu(cpu_core_map, cpu))]; if (!data) return -EINVAL; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index 8ae88f181286..afd2b520d35c 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h @@ -140,10 +140,10 @@ struct powernow_k8_data { #define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */ #define MAXIMUM_VID_STEPS 1 /* Current cpus only allow a single step of 25mV */ -#define VST_UNITS_20US 20 /* Voltage Stabalization Time is in units of 20us */ +#define VST_UNITS_20US 20 /* Voltage Stabilization Time is in units of 20us */ /* - * Most values of interest are enocoded in a single field of the _PSS + * Most values of interest are encoded in a single field of the _PSS * entries: the "control" value. */ diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c index d9f3e90a7ae0..42da9bd677d6 100644 --- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c +++ b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c @@ -102,7 +102,7 @@ static int sc520_freq_target (struct cpufreq_policy *policy, static int sc520_freq_cpu_init(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); int result; /* capability check */ @@ -151,7 +151,7 @@ static struct cpufreq_driver sc520_freq_driver = { static int __init sc520_freq_init(void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); int err; /* Test if we have the right hardware */ diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 811d47438546..3031f1196192 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -230,7 +230,7 @@ static struct cpu_model models[] = static int centrino_cpu_init_table(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; + struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu); struct cpu_model *model; for(model = models; model->cpu_id != NULL; model++) @@ -340,7 +340,7 @@ static unsigned int get_cur_freq(unsigned int cpu) static int centrino_cpu_init(struct cpufreq_policy *policy) { - struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; + struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu); unsigned freq; unsigned l, h; int ret; @@ -612,7 +612,7 @@ static struct cpufreq_driver centrino_driver = { */ static int __init centrino_init(void) { - struct cpuinfo_x86 *cpu = cpu_data; + struct cpuinfo_x86 *cpu = &cpu_data(0); if (!cpu_has(cpu, X86_FEATURE_EST)) return -ENODEV; diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 36685e8f7be1..14d68aa301ee 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -322,7 +322,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) /* only run on CPU to be set, or on its sibling */ #ifdef CONFIG_SMP - policy->cpus = cpu_sibling_map[policy->cpu]; + policy->cpus = per_cpu(cpu_sibling_map, policy->cpu); #endif cpus_allowed = current->cpus_allowed; diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index b1acc8ce3167..76c3ab0da468 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -228,7 +228,7 @@ EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency); unsigned int speedstep_detect_processor (void) { - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(0); u32 ebx, msr_lo, msr_hi; dprintk("x86: %x, model: %x\n", c->x86, c->x86_model); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 122d2d75aa9f..88d66fb8411d 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -93,7 +93,7 @@ static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c) local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ ccr5 = getCx86(CX86_CCR5); if (ccr5 & 2) setCx86(CX86_CCR5, ccr5 & 0xfd); /* reset SLOP */ @@ -115,9 +115,9 @@ static void __cpuinit set_cx86_reorder(void) printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n"); ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - /* Load/Store Serialize to mem access disable (=reorder it) */ + /* Load/Store Serialize to mem access disable (=reorder it) */ setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); /* set load/store serialize from 1GB to 4GB */ ccr3 |= 0xe0; @@ -146,7 +146,7 @@ static void __cpuinit set_cx86_inc(void) printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n"); ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* PCR1 -- Performance Control */ /* Incrementor on, whatever that is */ setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); @@ -256,7 +256,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) u32 vendor, device; /* It isn't really a PCI quirk directly, but the cure is the same. The MediaGX has deep magic SMM stuff that handles the - SB emulation. It thows away the fifo on disable_dma() which + SB emulation. It throws away the fifo on disable_dma() which is wrong and ruins the audio. Bug2: VSA1 has a wrap bug so that using maximum sized DMA diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index dc4e08147b1f..cc8c501b9f39 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -8,6 +8,7 @@ #include <linux/module.h> #include <asm/processor.h> +#include <asm/pgtable.h> #include <asm/msr.h> #include <asm/uaccess.h> @@ -19,8 +20,6 @@ #include <mach_apic.h> #endif -extern int trap_init_f00f_bug(void); - #ifdef CONFIG_X86_INTEL_USERCOPY /* * Alignment at which movsl is preferred for bulk memory copies. @@ -95,6 +94,20 @@ static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) return 1; } +#ifdef CONFIG_X86_F00F_BUG +static void __cpuinit trap_init_f00f_bug(void) +{ + __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); + + /* + * Update the IDT descriptor and reload the IDT so that + * it uses the read-only mapped virtual address. + */ + idt_descr.address = fix_to_virt(FIX_F00F_IDT); + load_idt(&idt_descr); +} +#endif + static void __cpuinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index db6c25aa5776..9921b01fe199 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -170,15 +170,15 @@ union l3_cache { unsigned val; }; -static const unsigned short assocs[] = { +static unsigned short assocs[] __cpuinitdata = { [1] = 1, [2] = 2, [4] = 4, [6] = 8, [8] = 16, [0xa] = 32, [0xb] = 48, [0xc] = 64, [0xf] = 0xffff // ?? }; -static const unsigned char levels[] = { 1, 1, 2, 3 }; -static const unsigned char types[] = { 1, 2, 3, 3 }; +static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 }; +static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 }; static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, union _cpuid4_leaf_ebx *ebx, @@ -295,7 +295,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; #ifdef CONFIG_X86_HT - unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data); + unsigned int cpu = c->cpu_index; #endif if (c->cpuid_level > 3) { @@ -417,14 +417,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) if (new_l2) { l2 = new_l2; #ifdef CONFIG_X86_HT - cpu_llc_id[cpu] = l2_id; + per_cpu(cpu_llc_id, cpu) = l2_id; #endif } if (new_l3) { l3 = new_l3; #ifdef CONFIG_X86_HT - cpu_llc_id[cpu] = l3_id; + per_cpu(cpu_llc_id, cpu) = l3_id; #endif } @@ -459,7 +459,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) struct _cpuid4_info *this_leaf, *sibling_leaf; unsigned long num_threads_sharing; int index_msb, i; - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(cpu); this_leaf = CPUID4_INFO_IDX(cpu, index); num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; @@ -470,8 +470,8 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) index_msb = get_count_order(num_threads_sharing); for_each_online_cpu(i) { - if (c[i].apicid >> index_msb == - c[cpu].apicid >> index_msb) { + if (cpu_data(i).apicid >> index_msb == + c->apicid >> index_msb) { cpu_set(i, this_leaf->shared_cpu_map); if (i != cpu && cpuid4_info[i]) { sibling_leaf = CPUID4_INFO_IDX(i, index); @@ -493,12 +493,17 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) } } #else -static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {} -static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {} +static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) {} +static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) {} #endif static void free_cache_attributes(unsigned int cpu) { + int i; + + for (i = 0; i < num_cache_leaves; i++) + cache_remove_shared_cpu_map(cpu, i); + kfree(cpuid4_info[cpu]); cpuid4_info[cpu] = NULL; } @@ -506,8 +511,8 @@ static void free_cache_attributes(unsigned int cpu) static int __cpuinit detect_cache_attributes(unsigned int cpu) { struct _cpuid4_info *this_leaf; - unsigned long j; - int retval; + unsigned long j; + int retval; cpumask_t oldmask; if (num_cache_leaves == 0) @@ -524,19 +529,26 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) goto out; /* Do cpuid and store the results */ - retval = 0; for (j = 0; j < num_cache_leaves; j++) { this_leaf = CPUID4_INFO_IDX(cpu, j); retval = cpuid4_cache_lookup(j, this_leaf); - if (unlikely(retval < 0)) + if (unlikely(retval < 0)) { + int i; + + for (i = 0; i < j; i++) + cache_remove_shared_cpu_map(cpu, i); break; + } cache_shared_cpu_map_setup(cpu, j); } set_cpus_allowed(current, oldmask); out: - if (retval) - free_cache_attributes(cpu); + if (retval) { + kfree(cpuid4_info[cpu]); + cpuid4_info[cpu] = NULL; + } + return retval; } @@ -669,7 +681,7 @@ static struct kobj_type ktype_percpu_entry = { .sysfs_ops = &sysfs_ops, }; -static void cpuid4_cache_sysfs_exit(unsigned int cpu) +static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu) { kfree(cache_kobject[cpu]); kfree(index_kobject[cpu]); @@ -680,13 +692,14 @@ static void cpuid4_cache_sysfs_exit(unsigned int cpu) static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) { + int err; if (num_cache_leaves == 0) return -ENOENT; - detect_cache_attributes(cpu); - if (cpuid4_info[cpu] == NULL) - return -ENOENT; + err = detect_cache_attributes(cpu); + if (err) + return err; /* Allocate all required memory */ cache_kobject[cpu] = kzalloc(sizeof(struct kobject), GFP_KERNEL); @@ -705,13 +718,15 @@ err_out: return -ENOMEM; } +static cpumask_t cache_dev_map = CPU_MASK_NONE; + /* Add/Remove cache interface for CPU device */ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) { unsigned int cpu = sys_dev->id; unsigned long i, j; struct _index_kobject *this_object; - int retval = 0; + int retval; retval = cpuid4_cache_sysfs_init(cpu); if (unlikely(retval < 0)) @@ -721,6 +736,10 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) kobject_set_name(cache_kobject[cpu], "%s", "cache"); cache_kobject[cpu]->ktype = &ktype_percpu_entry; retval = kobject_register(cache_kobject[cpu]); + if (retval < 0) { + cpuid4_cache_sysfs_exit(cpu); + return retval; + } for (i = 0; i < num_cache_leaves; i++) { this_object = INDEX_KOBJECT_PTR(cpu,i); @@ -740,6 +759,9 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) break; } } + if (!retval) + cpu_set(cpu, cache_dev_map); + return retval; } @@ -750,13 +772,14 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) if (cpuid4_info[cpu] == NULL) return; - for (i = 0; i < num_cache_leaves; i++) { - cache_remove_shared_cpu_map(cpu, i); + if (!cpu_isset(cpu, cache_dev_map)) + return; + cpu_clear(cpu, cache_dev_map); + + for (i = 0; i < num_cache_leaves; i++) kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); - } kobject_unregister(cache_kobject[cpu]); cpuid4_cache_sysfs_exit(cpu); - return; } static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, @@ -781,7 +804,7 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = { - .notifier_call = cacheinfo_cpu_callback, + .notifier_call = cacheinfo_cpu_callback, }; static int __cpuinit cache_sysfs_init(void) @@ -791,13 +814,15 @@ static int __cpuinit cache_sysfs_init(void) if (num_cache_leaves == 0) return 0; - register_hotcpu_notifier(&cacheinfo_cpu_notifier); - for_each_online_cpu(i) { - cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE, - (void *)(long)i); - } + int err; + struct sys_device *sys_dev = get_cpu_sysdev(i); + err = cache_add_dev(sys_dev); + if (err) + return err; + } + register_hotcpu_notifier(&cacheinfo_cpu_notifier); return 0; } diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index f1ebe1c1c17a..d7d2323bbb69 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile @@ -1,2 +1,6 @@ -obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o therm_throt.o -obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o +obj-y = mce_$(BITS).o therm_throt.o + +obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o +obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o +obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o +obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce_32.c index 34c781eddee4..34c781eddee4 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce_32.c diff --git a/arch/x86/kernel/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index a66d607f5b92..447b351f1f2a 100644 --- a/arch/x86/kernel/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -1,8 +1,8 @@ /* * Machine check handler. * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. - * Rest from unknown author(s). - * 2004 Andi Kleen. Rewrote most of it. + * Rest from unknown author(s). + * 2004 Andi Kleen. Rewrote most of it. */ #include <linux/init.h> @@ -23,7 +23,7 @@ #include <linux/ctype.h> #include <linux/kmod.h> #include <linux/kdebug.h> -#include <asm/processor.h> +#include <asm/processor.h> #include <asm/msr.h> #include <asm/mce.h> #include <asm/uaccess.h> @@ -63,10 +63,10 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); * separate MCEs from kernel messages to avoid bogus bug reports. */ -struct mce_log mcelog = { +struct mce_log mcelog = { MCE_LOG_SIGNATURE, MCE_LOG_LEN, -}; +}; void mce_log(struct mce *mce) { @@ -76,9 +76,6 @@ void mce_log(struct mce *mce) wmb(); for (;;) { entry = rcu_dereference(mcelog.next); - /* The rmb forces the compiler to reload next in each - iteration */ - rmb(); for (;;) { /* When the buffer fills up discard new entries. Assume that the earlier errors are the more interesting. */ @@ -114,42 +111,42 @@ static void print_mce(struct mce *m) "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", m->cpu, m->mcgstatus, m->bank, m->status); if (m->rip) { - printk(KERN_EMERG - "RIP%s %02x:<%016Lx> ", + printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", m->cs, m->rip); if (m->cs == __KERNEL_CS) print_symbol("{%s}", m->rip); printk("\n"); } - printk(KERN_EMERG "TSC %Lx ", m->tsc); + printk(KERN_EMERG "TSC %Lx ", m->tsc); if (m->addr) printk("ADDR %Lx ", m->addr); if (m->misc) - printk("MISC %Lx ", m->misc); + printk("MISC %Lx ", m->misc); printk("\n"); printk(KERN_EMERG "This is not a software problem!\n"); - printk(KERN_EMERG - "Run through mcelog --ascii to decode and contact your hardware vendor\n"); + printk(KERN_EMERG "Run through mcelog --ascii to decode " + "and contact your hardware vendor\n"); } static void mce_panic(char *msg, struct mce *backup, unsigned long start) -{ +{ int i; oops_begin(); for (i = 0; i < MCE_LOG_LEN; i++) { unsigned long tsc = mcelog.entry[i].tsc; + if (time_before(tsc, start)) continue; - print_mce(&mcelog.entry[i]); + print_mce(&mcelog.entry[i]); if (backup && mcelog.entry[i].tsc == backup->tsc) backup = NULL; } if (backup) print_mce(backup); panic(msg); -} +} static int mce_available(struct cpuinfo_x86 *c) { @@ -173,10 +170,9 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) } } -/* +/* * The actual machine check handler */ - void do_machine_check(struct pt_regs * regs, long error_code) { struct mce m, panicm; @@ -197,7 +193,8 @@ void do_machine_check(struct pt_regs * regs, long error_code) atomic_inc(&mce_entry); if (regs) - notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL); + notify_die(DIE_NMI, "machine check", regs, error_code, 18, + SIGKILL); if (!banks) goto out2; @@ -207,15 +204,15 @@ void do_machine_check(struct pt_regs * regs, long error_code) /* if the restart IP is not valid, we're done for */ if (!(m.mcgstatus & MCG_STATUS_RIPV)) no_way_out = 1; - + rdtscll(mcestart); barrier(); for (i = 0; i < banks; i++) { if (!bank[i]) continue; - - m.misc = 0; + + m.misc = 0; m.addr = 0; m.bank = i; m.tsc = 0; @@ -323,7 +320,7 @@ void do_machine_check(struct pt_regs * regs, long error_code) #ifdef CONFIG_X86_MCE_INTEL /*** * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog - * @cpu: The CPU on which the event occured. + * @cpu: The CPU on which the event occurred. * @status: Event status information * * This function should be called by the thermal interrupt after the @@ -375,7 +372,7 @@ static void mcheck_timer(struct work_struct *work) if (mce_notify_user()) { next_interval = max(next_interval/2, HZ/100); } else { - next_interval = min(next_interval*2, + next_interval = min(next_interval * 2, (int)round_jiffies_relative(check_interval*HZ)); } @@ -426,18 +423,18 @@ static struct notifier_block mce_idle_notifier = { }; static __init int periodic_mcheck_init(void) -{ +{ next_interval = check_interval * HZ; if (next_interval) schedule_delayed_work(&mcheck_work, round_jiffies_relative(next_interval)); idle_notifier_register(&mce_idle_notifier); return 0; -} +} __initcall(periodic_mcheck_init); -/* +/* * Initialize Machine Checks for a CPU. */ static void mce_init(void *dummy) @@ -447,9 +444,9 @@ static void mce_init(void *dummy) rdmsrl(MSR_IA32_MCG_CAP, cap); banks = cap & 0xff; - if (banks > NR_BANKS) { + if (banks > NR_BANKS) { printk(KERN_INFO "MCE: warning: using only %d banks\n", banks); - banks = NR_BANKS; + banks = NR_BANKS; } /* Use accurate RIP reporting if available. */ if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) @@ -467,15 +464,15 @@ static void mce_init(void *dummy) for (i = 0; i < banks; i++) { wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); - } + } } /* Add per CPU specific workarounds here */ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) -{ +{ /* This should be disabled by the BIOS, but isn't always */ if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) { - /* disable GART TBL walk error reporting, which trips off + /* disable GART TBL walk error reporting, which trips off incorrectly with the IOMMU & 3ware & Cerberus. */ clear_bit(10, &bank[4]); /* Lots of broken BIOS around that don't clear them @@ -483,7 +480,7 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) mce_bootlog = 0; } -} +} static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) { @@ -499,15 +496,15 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) } } -/* +/* * Called for each booted CPU to set up machine checks. - * Must be called with preempt off. + * Must be called with preempt off. */ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) { static cpumask_t mce_cpus = CPU_MASK_NONE; - mce_cpu_quirks(c); + mce_cpu_quirks(c); if (mce_dont_init || cpu_test_and_set(smp_processor_id(), mce_cpus) || @@ -556,13 +553,15 @@ static int mce_release(struct inode *inode, struct file *file) return 0; } -static void collect_tscs(void *data) -{ +static void collect_tscs(void *data) +{ unsigned long *cpu_tsc = (unsigned long *)data; + rdtscll(cpu_tsc[smp_processor_id()]); -} +} -static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff_t *off) +static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, + loff_t *off) { unsigned long *cpu_tsc; static DECLARE_MUTEX(mce_read_sem); @@ -574,19 +573,20 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff if (!cpu_tsc) return -ENOMEM; - down(&mce_read_sem); + down(&mce_read_sem); next = rcu_dereference(mcelog.next); /* Only supports full reads right now */ - if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { + if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { up(&mce_read_sem); kfree(cpu_tsc); return -EINVAL; } err = 0; - for (i = 0; i < next; i++) { + for (i = 0; i < next; i++) { unsigned long start = jiffies; + while (!mcelog.entry[i].finished) { if (time_after_eq(jiffies, start + 2)) { memset(mcelog.entry + i,0, sizeof(struct mce)); @@ -596,31 +596,34 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff } smp_rmb(); err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce)); - buf += sizeof(struct mce); + buf += sizeof(struct mce); timeout: ; - } + } memset(mcelog.entry, 0, next * sizeof(struct mce)); mcelog.next = 0; synchronize_sched(); - /* Collect entries that were still getting written before the synchronize. */ - + /* + * Collect entries that were still getting written before the + * synchronize. + */ on_each_cpu(collect_tscs, cpu_tsc, 1, 1); - for (i = next; i < MCE_LOG_LEN; i++) { - if (mcelog.entry[i].finished && - mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { - err |= copy_to_user(buf, mcelog.entry+i, sizeof(struct mce)); + for (i = next; i < MCE_LOG_LEN; i++) { + if (mcelog.entry[i].finished && + mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { + err |= copy_to_user(buf, mcelog.entry+i, + sizeof(struct mce)); smp_rmb(); buf += sizeof(struct mce); memset(&mcelog.entry[i], 0, sizeof(struct mce)); } - } + } up(&mce_read_sem); kfree(cpu_tsc); - return err ? -EFAULT : buf - ubuf; + return err ? -EFAULT : buf - ubuf; } static unsigned int mce_poll(struct file *file, poll_table *wait) @@ -631,26 +634,29 @@ static unsigned int mce_poll(struct file *file, poll_table *wait) return 0; } -static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, unsigned long arg) +static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, + unsigned long arg) { int __user *p = (int __user *)arg; + if (!capable(CAP_SYS_ADMIN)) - return -EPERM; + return -EPERM; switch (cmd) { - case MCE_GET_RECORD_LEN: + case MCE_GET_RECORD_LEN: return put_user(sizeof(struct mce), p); case MCE_GET_LOG_LEN: - return put_user(MCE_LOG_LEN, p); + return put_user(MCE_LOG_LEN, p); case MCE_GETCLEAR_FLAGS: { unsigned flags; - do { + + do { flags = mcelog.flags; - } while (cmpxchg(&mcelog.flags, flags, 0) != flags); - return put_user(flags, p); + } while (cmpxchg(&mcelog.flags, flags, 0) != flags); + return put_user(flags, p); } default: - return -ENOTTY; - } + return -ENOTTY; + } } static const struct file_operations mce_chrdev_ops = { @@ -681,25 +687,22 @@ void __init restart_mce(void) set_in_cr4(X86_CR4_MCE); } -/* - * Old style boot options parsing. Only for compatibility. +/* + * Old style boot options parsing. Only for compatibility. */ - static int __init mcheck_disable(char *str) { mce_dont_init = 1; return 1; } -/* mce=off disables machine check. Note you can reenable it later +/* mce=off disables machine check. Note you can re-enable it later using sysfs. mce=TOLERANCELEVEL (number, see above) mce=bootlog Log MCEs from before booting. Disabled by default on AMD. mce=nobootlog Don't log MCEs from before booting. */ static int __init mcheck_enable(char *str) { - if (*str == '=') - str++; if (!strcmp(str, "off")) mce_dont_init = 1; else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog")) @@ -707,16 +710,16 @@ static int __init mcheck_enable(char *str) else if (isdigit(str[0])) get_option(&str, &tolerant); else - printk("mce= argument %s ignored. Please use /sys", str); + printk("mce= argument %s ignored. Please use /sys", str); return 1; } __setup("nomce", mcheck_disable); -__setup("mce", mcheck_enable); +__setup("mce=", mcheck_enable); -/* +/* * Sysfs support - */ + */ /* On resume clear all MCE state. Don't want to see leftovers from the BIOS. Only one CPU is active at this time, the others get readded later using @@ -728,12 +731,12 @@ static int mce_resume(struct sys_device *dev) } /* Reinit MCEs after user configuration changes */ -static void mce_restart(void) -{ +static void mce_restart(void) +{ if (next_interval) cancel_delayed_work(&mcheck_work); /* Timer race is harmless here */ - on_each_cpu(mce_init, NULL, 1, 1); + on_each_cpu(mce_init, NULL, 1, 1); next_interval = check_interval * HZ; if (next_interval) schedule_delayed_work(&mcheck_work, @@ -749,17 +752,17 @@ DEFINE_PER_CPU(struct sys_device, device_mce); /* Why are there no generic functions for this? */ #define ACCESSOR(name, var, start) \ - static ssize_t show_ ## name(struct sys_device *s, char *buf) { \ - return sprintf(buf, "%lx\n", (unsigned long)var); \ - } \ + static ssize_t show_ ## name(struct sys_device *s, char *buf) { \ + return sprintf(buf, "%lx\n", (unsigned long)var); \ + } \ static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \ - char *end; \ - unsigned long new = simple_strtoul(buf, &end, 0); \ - if (end == buf) return -EINVAL; \ - var = new; \ - start; \ - return end-buf; \ - } \ + char *end; \ + unsigned long new = simple_strtoul(buf, &end, 0); \ + if (end == buf) return -EINVAL; \ + var = new; \ + start; \ + return end-buf; \ + } \ static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); /* TBD should generate these dynamically based on number of available banks */ @@ -799,24 +802,41 @@ static struct sysdev_attribute *mce_attributes[] = { NULL }; +static cpumask_t mce_device_initialized = CPU_MASK_NONE; + /* Per cpu sysdev init. All of the cpus still share the same ctl bank */ static __cpuinit int mce_create_device(unsigned int cpu) { int err; int i; - if (!mce_available(&cpu_data[cpu])) + + if (!mce_available(&cpu_data(cpu))) return -EIO; + memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); per_cpu(device_mce,cpu).id = cpu; per_cpu(device_mce,cpu).cls = &mce_sysclass; err = sysdev_register(&per_cpu(device_mce,cpu)); + if (err) + return err; + + for (i = 0; mce_attributes[i]; i++) { + err = sysdev_create_file(&per_cpu(device_mce,cpu), + mce_attributes[i]); + if (err) + goto error; + } + cpu_set(cpu, mce_device_initialized); - if (!err) { - for (i = 0; mce_attributes[i]; i++) - sysdev_create_file(&per_cpu(device_mce,cpu), - mce_attributes[i]); + return 0; +error: + while (i--) { + sysdev_remove_file(&per_cpu(device_mce,cpu), + mce_attributes[i]); } + sysdev_unregister(&per_cpu(device_mce,cpu)); + return err; } @@ -824,11 +844,14 @@ static void mce_remove_device(unsigned int cpu) { int i; + if (!cpu_isset(cpu, mce_device_initialized)) + return; + for (i = 0; mce_attributes[i]; i++) sysdev_remove_file(&per_cpu(device_mce,cpu), mce_attributes[i]); sysdev_unregister(&per_cpu(device_mce,cpu)); - memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); + cpu_clear(cpu, mce_device_initialized); } /* Get notified when a cpu comes on/off. Be hotplug friendly. */ @@ -862,9 +885,13 @@ static __init int mce_init_device(void) if (!mce_available(&boot_cpu_data)) return -EIO; err = sysdev_class_register(&mce_sysclass); + if (err) + return err; for_each_online_cpu(i) { - mce_create_device(i); + err = mce_create_device(i); + if (err) + return err; } register_hotcpu_notifier(&mce_cpu_notifier); diff --git a/arch/x86/kernel/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 2f8a7f18b0fe..752fb16a817d 100644 --- a/arch/x86/kernel/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -237,6 +237,7 @@ asmlinkage void mce_threshold_interrupt(void) } } out: + add_pda(irq_threshold_count, 1); irq_exit(); } @@ -471,11 +472,11 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) sprintf(name, "threshold_bank%i", bank); #ifdef CONFIG_SMP - if (cpu_data[cpu].cpu_core_id && shared_bank[bank]) { /* symlink */ - i = first_cpu(cpu_core_map[cpu]); + if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ + i = first_cpu(per_cpu(cpu_core_map, cpu)); /* first core not up yet */ - if (cpu_data[i].cpu_core_id) + if (cpu_data(i).cpu_core_id) goto out; /* already linked */ @@ -492,7 +493,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (err) goto out; - b->cpus = cpu_core_map[cpu]; + b->cpus = per_cpu(cpu_core_map, cpu); per_cpu(threshold_banks, cpu)[bank] = b; goto out; } @@ -509,7 +510,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) #ifndef CONFIG_SMP b->cpus = CPU_MASK_ALL; #else - b->cpus = cpu_core_map[cpu]; + b->cpus = per_cpu(cpu_core_map, cpu); #endif err = kobject_register(&b->kobj); if (err) diff --git a/arch/x86/kernel/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index 6551505d8a2c..c17eaf5dd6dd 100644 --- a/arch/x86/kernel/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -26,6 +26,7 @@ asmlinkage void smp_thermal_interrupt(void) if (therm_throt_process(msr_val & 1)) mce_log_therm_throt_event(smp_processor_id(), msr_val); + add_pda(irq_thermal_count, 1); irq_exit(); } diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index 1509edfb2313..be4dabfee1f5 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -61,6 +61,7 @@ fastcall void smp_thermal_interrupt(struct pt_regs *regs) { irq_enter(); vendor_thermal_interrupt(regs); + __get_cpu_var(irq_stat).irq_thermal_count++; irq_exit(); } diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 1203dc5ab87a..24885be5c48c 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -131,17 +131,19 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, { unsigned int cpu = (unsigned long)hcpu; struct sys_device *sys_dev; - int err; + int err = 0; sys_dev = get_cpu_sysdev(cpu); switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: mutex_lock(&therm_cpu_lock); err = thermal_throttle_add_dev(sys_dev); mutex_unlock(&therm_cpu_lock); WARN_ON(err); break; + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: mutex_lock(&therm_cpu_lock); @@ -149,10 +151,10 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, mutex_unlock(&therm_cpu_lock); break; } - return NOTIFY_OK; + return err ? NOTIFY_BAD : NOTIFY_OK; } -static struct notifier_block thermal_throttle_cpu_notifier = +static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = { .notifier_call = thermal_throttle_cpu_callback, }; diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index 2287d4863a8a..9964be3de2b7 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -147,10 +147,10 @@ static void prepare_set(void) write_cr0(cr0); wbinvd(); - /* Cyrix ARRs - everything else were excluded at the top */ + /* Cyrix ARRs - everything else was excluded at the top */ ccr3 = getCx86(CX86_CCR3); - /* Cyrix ARRs - everything else were excluded at the top */ + /* Cyrix ARRs - everything else was excluded at the top */ setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); } diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 56f64e34829f..992f08dfbb6c 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -182,7 +182,7 @@ static inline void k8_enable_fixed_iorrs(void) /** * Checks and updates an fixed-range MTRR if it differs from the value it - * should have. If K8 extenstions are wanted, update the K8 SYSCFG MSR also. + * should have. If K8 extentions are wanted, update the K8 SYSCFG MSR also. * see AMD publication no. 24593, chapter 7.8.1, page 233 for more information * \param msr MSR address of the MTTR which should be checked and updated * \param changed pointer which indicates whether the MTRR needed to be changed diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index c48b6fea5ab4..3b20613325dc 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -139,13 +139,12 @@ struct set_mtrr_data { mtrr_type smp_type; }; -#ifdef CONFIG_SMP - static void ipi_handler(void *info) /* [SUMMARY] Synchronisation handler. Executed by "other" CPUs. [RETURNS] Nothing. */ { +#ifdef CONFIG_SMP struct set_mtrr_data *data = info; unsigned long flags; @@ -168,9 +167,8 @@ static void ipi_handler(void *info) atomic_dec(&data->count); local_irq_restore(flags); -} - #endif +} static inline int types_compatible(mtrr_type type1, mtrr_type type2) { return type1 == MTRR_TYPE_UNCACHABLE || @@ -738,13 +736,7 @@ void mtrr_ap_init(void) */ void mtrr_save_state(void) { - int cpu = get_cpu(); - - if (cpu == 0) - mtrr_save_fixed_ranges(NULL); - else - smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1); - put_cpu(); + smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1); } static int __init mtrr_init_finialize(void) @@ -754,7 +746,7 @@ static int __init mtrr_init_finialize(void) if (use_intel()) mtrr_state_warn(); else { - /* The CPUs haven't MTRR and seemes not support SMP. They have + /* The CPUs haven't MTRR and seem to not support SMP. They have * specific drivers, we use a tricky method to support * suspend/resume for them. * TBD: is there any system with such CPU which supports diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 93fecd4b03de..c02541e6e653 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -34,7 +34,7 @@ struct wd_ops { u64 checkbit; }; -static struct wd_ops *wd_ops; +static const struct wd_ops *wd_ops; /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) @@ -120,7 +120,9 @@ int reserve_perfctr_nmi(unsigned int msr) unsigned int counter; counter = nmi_perfctr_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); + /* register not managed by the allocator? */ + if (counter > NMI_MAX_COUNTER_BITS) + return 1; if (!test_and_set_bit(counter, perfctr_nmi_owner)) return 1; @@ -132,7 +134,9 @@ void release_perfctr_nmi(unsigned int msr) unsigned int counter; counter = nmi_perfctr_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); + /* register not managed by the allocator? */ + if (counter > NMI_MAX_COUNTER_BITS) + return; clear_bit(counter, perfctr_nmi_owner); } @@ -142,7 +146,9 @@ int reserve_evntsel_nmi(unsigned int msr) unsigned int counter; counter = nmi_evntsel_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); + /* register not managed by the allocator? */ + if (counter > NMI_MAX_COUNTER_BITS) + return 1; if (!test_and_set_bit(counter, evntsel_nmi_owner)) return 1; @@ -154,7 +160,9 @@ void release_evntsel_nmi(unsigned int msr) unsigned int counter; counter = nmi_evntsel_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); + /* register not managed by the allocator? */ + if (counter > NMI_MAX_COUNTER_BITS) + return; clear_bit(counter, evntsel_nmi_owner); } @@ -317,7 +325,7 @@ static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); } -static struct wd_ops k7_wd_ops = { +static const struct wd_ops k7_wd_ops = { .reserve = single_msr_reserve, .unreserve = single_msr_unreserve, .setup = setup_k7_watchdog, @@ -380,7 +388,7 @@ static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); } -static struct wd_ops p6_wd_ops = { +static const struct wd_ops p6_wd_ops = { .reserve = single_msr_reserve, .unreserve = single_msr_unreserve, .setup = setup_p6_watchdog, @@ -532,7 +540,7 @@ static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); } -static struct wd_ops p4_wd_ops = { +static const struct wd_ops p4_wd_ops = { .reserve = p4_reserve, .unreserve = p4_unreserve, .setup = setup_p4_watchdog, @@ -550,6 +558,8 @@ static struct wd_ops p4_wd_ops = { #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK +static struct wd_ops intel_arch_wd_ops; + static int setup_intel_arch_watchdog(unsigned nmi_hz) { unsigned int ebx; @@ -591,11 +601,11 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz) wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = 0; //unused - wd_ops->checkbit = 1ULL << (eax.split.bit_width - 1); + intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); return 1; } -static struct wd_ops intel_arch_wd_ops = { +static struct wd_ops intel_arch_wd_ops __read_mostly = { .reserve = single_msr_reserve, .unreserve = single_msr_unreserve, .setup = setup_intel_arch_watchdog, diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 1e31b6caffb1..066f8c6af4df 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -49,7 +49,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) /* Intel-defined (#2) */ "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt", + NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* VIA/Cyrix/Centaur-defined */ @@ -59,10 +59,10 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* AMD-defined (#2) */ - "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy", - "altmovcr8", "abm", "sse4a", - "misalignsse", "3dnowprefetch", - "osvw", "ibs", NULL, NULL, NULL, NULL, + "lahf_lm", "cmp_legacy", "svm", "extapic", + "cr8_legacy", "abm", "sse4a", "misalignsse", + "3dnowprefetch", "osvw", "ibs", "sse5", + "skinit", "wdt", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -85,12 +85,13 @@ static int show_cpuinfo(struct seq_file *m, void *v) /* nothing */ }; struct cpuinfo_x86 *c = v; - int i, n = c - cpu_data; + int i, n = 0; int fpu_exception; #ifdef CONFIG_SMP if (!cpu_online(n)) return 0; + n = c->cpu_index; #endif seq_printf(m, "processor\t: %d\n" "vendor_id\t: %s\n" @@ -122,7 +123,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) #ifdef CONFIG_X86_HT if (c->x86_max_cores * smp_num_siblings > 1) { seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); - seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n])); + seq_printf(m, "siblings\t: %d\n", + cpus_weight(per_cpu(cpu_core_map, n))); seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); } @@ -174,11 +176,15 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < NR_CPUS ? cpu_data + *pos : NULL; + if (*pos == 0) /* just in case, cpu 0 is not the first */ + *pos = first_cpu(cpu_possible_map); + if ((*pos) < NR_CPUS && cpu_possible(*pos)) + return &cpu_data(*pos); + return NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) { - ++*pos; + *pos = next_cpu(*pos, cpu_possible_map); return c_start(m, pos); } static void c_stop(struct seq_file *m, void *v) diff --git a/arch/x86/kernel/cpufreq/Kconfig b/arch/x86/kernel/cpufreq/Kconfig deleted file mode 100644 index a3fd51926cbd..000000000000 --- a/arch/x86/kernel/cpufreq/Kconfig +++ /dev/null @@ -1,108 +0,0 @@ -# -# CPU Frequency scaling -# - -menu "CPU Frequency scaling" - -source "drivers/cpufreq/Kconfig" - -if CPU_FREQ - -comment "CPUFreq processor drivers" - -config X86_POWERNOW_K8 - tristate "AMD Opteron/Athlon64 PowerNow!" - select CPU_FREQ_TABLE - help - This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors. - - To compile this driver as a module, choose M here: the - module will be called powernow-k8. - - For details, take a look at <file:Documentation/cpu-freq/>. - - If in doubt, say N. - -config X86_POWERNOW_K8_ACPI - bool - depends on X86_POWERNOW_K8 && ACPI_PROCESSOR - depends on !(X86_POWERNOW_K8 = y && ACPI_PROCESSOR = m) - default y - -config X86_SPEEDSTEP_CENTRINO - tristate "Intel Enhanced SpeedStep (deprecated)" - select CPU_FREQ_TABLE - depends on ACPI_PROCESSOR - help - This is deprecated and this functionality is now merged into - acpi_cpufreq (X86_ACPI_CPUFREQ). Use that driver instead of - speedstep_centrino. - This adds the CPUFreq driver for Enhanced SpeedStep enabled - mobile CPUs. This means Intel Pentium M (Centrino) CPUs - or 64bit enabled Intel Xeons. - - To compile this driver as a module, choose M here: the - module will be called speedstep-centrino. - - For details, take a look at <file:Documentation/cpu-freq/>. - - If in doubt, say N. - -config X86_ACPI_CPUFREQ - tristate "ACPI Processor P-States driver" - select CPU_FREQ_TABLE - depends on ACPI_PROCESSOR - help - This driver adds a CPUFreq driver which utilizes the ACPI - Processor Performance States. - This driver also supports Intel Enhanced Speedstep. - - To compile this driver as a module, choose M here: the - module will be called acpi-cpufreq. - - For details, take a look at <file:Documentation/cpu-freq/>. - - If in doubt, say N. - -comment "shared options" - -config X86_ACPI_CPUFREQ_PROC_INTF - bool "/proc/acpi/processor/../performance interface (deprecated)" - depends on PROC_FS - depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K8_ACPI - help - This enables the deprecated /proc/acpi/processor/../performance - interface. While it is helpful for debugging, the generic, - cross-architecture cpufreq interfaces should be used. - - If in doubt, say N. - -config X86_P4_CLOCKMOD - tristate "Intel Pentium 4 clock modulation" - depends on EMBEDDED - select CPU_FREQ_TABLE - help - This adds the clock modulation driver for Intel Pentium 4 / XEON - processors. When enabled it will lower CPU temperature by skipping - clocks. - - This driver should be only used in exceptional - circumstances when very low power is needed because it causes severe - slowdowns and noticeable latencies. Normally Speedstep should be used - instead. - - To compile this driver as a module, choose M here: the - module will be called p4-clockmod. - - For details, take a look at <file:Documentation/cpu-freq/>. - - Unless you are absolutely sure say N. - - -config X86_SPEEDSTEP_LIB - tristate - default X86_P4_CLOCKMOD - -endif - -endmenu diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index f4548c93ccf5..05c9936a16cc 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -43,8 +43,6 @@ static struct class *cpuid_class; -#ifdef CONFIG_SMP - struct cpuid_command { u32 reg; u32 *data; @@ -62,25 +60,11 @@ static inline void do_cpuid(int cpu, u32 reg, u32 * data) { struct cpuid_command cmd; - preempt_disable(); - if (cpu == smp_processor_id()) { - cpuid(reg, &data[0], &data[1], &data[2], &data[3]); - } else { - cmd.reg = reg; - cmd.data = data; + cmd.reg = reg; + cmd.data = data; - smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1); - } - preempt_enable(); + smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1); } -#else /* ! CONFIG_SMP */ - -static inline void do_cpuid(int cpu, u32 reg, u32 * data) -{ - cpuid(reg, &data[0], &data[1], &data[2], &data[3]); -} - -#endif /* ! CONFIG_SMP */ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) { @@ -130,7 +114,7 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, static int cpuid_open(struct inode *inode, struct file *file) { unsigned int cpu = iminor(file->f_path.dentry->d_inode); - struct cpuinfo_x86 *c = &(cpu_data)[cpu]; + struct cpuinfo_x86 *c = &cpu_data(cpu); if (cpu >= NR_CPUS || !cpu_online(cpu)) return -ENXIO; /* No such CPU */ @@ -150,32 +134,40 @@ static const struct file_operations cpuid_fops = { .open = cpuid_open, }; -static int cpuid_device_create(int i) +static __cpuinit int cpuid_device_create(int cpu) { - int err = 0; struct device *dev; - dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, i), "cpu%d",i); - if (IS_ERR(dev)) - err = PTR_ERR(dev); - return err; + dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu), + "cpu%d", cpu); + return IS_ERR(dev) ? PTR_ERR(dev) : 0; +} + +static void cpuid_device_destroy(int cpu) +{ + device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); } -static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) +static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) { unsigned int cpu = (unsigned long)hcpu; + int err = 0; switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - cpuid_device_create(cpu); + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + err = cpuid_device_create(cpu); break; + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: - device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); + cpuid_device_destroy(cpu); break; } - return NOTIFY_OK; + return err ? NOTIFY_BAD : NOTIFY_OK; } static struct notifier_block __cpuinitdata cpuid_class_cpu_notifier = @@ -212,7 +204,7 @@ static int __init cpuid_init(void) out_class: i = 0; for_each_online_cpu(i) { - device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, i)); + cpuid_device_destroy(i); } class_destroy(cpuid_class); out_chrdev: @@ -226,7 +218,7 @@ static void __exit cpuid_exit(void) int cpu = 0; for_each_online_cpu(cpu) - device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); + cpuid_device_destroy(cpu); class_destroy(cpuid_class); unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); diff --git a/arch/x86/kernel/crash_32.c b/arch/x86/kernel/crash.c index 53589d1b1a05..8bb482ff091b 100644 --- a/arch/x86/kernel/crash_32.c +++ b/arch/x86/kernel/crash.c @@ -1,5 +1,5 @@ /* - * Architecture specific (i386) functions for kexec based crash dumps. + * Architecture specific (i386/x86_64) functions for kexec based crash dumps. * * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) * @@ -25,8 +25,11 @@ #include <linux/kdebug.h> #include <asm/smp.h> +#ifdef CONFIG_X86_32 #include <mach_ipi.h> - +#else +#include <asm/mach_apic.h> +#endif /* This keeps a track of which one is crashing cpu. */ static int crashing_cpu; @@ -38,7 +41,9 @@ static int crash_nmi_callback(struct notifier_block *self, unsigned long val, void *data) { struct pt_regs *regs; +#ifdef CONFIG_X86_32 struct pt_regs fixed_regs; +#endif int cpu; if (val != DIE_NMI_IPI) @@ -55,10 +60,12 @@ static int crash_nmi_callback(struct notifier_block *self, return NOTIFY_STOP; local_irq_disable(); +#ifdef CONFIG_X86_32 if (!user_mode_vm(regs)) { crash_fixup_ss_esp(&fixed_regs, regs); regs = &fixed_regs; } +#endif crash_save_cpu(regs, cpu); disable_local_APIC(); atomic_dec(&waiting_for_crash_ipi); diff --git a/arch/x86/kernel/crash_64.c b/arch/x86/kernel/crash_64.c deleted file mode 100644 index 13432a1ae904..000000000000 --- a/arch/x86/kernel/crash_64.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Architecture specific (x86_64) functions for kexec based crash dumps. - * - * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) - * - * Copyright (C) IBM Corporation, 2004. All rights reserved. - * - */ - -#include <linux/init.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/smp.h> -#include <linux/irq.h> -#include <linux/reboot.h> -#include <linux/kexec.h> -#include <linux/delay.h> -#include <linux/elf.h> -#include <linux/elfcore.h> -#include <linux/kdebug.h> - -#include <asm/processor.h> -#include <asm/hardirq.h> -#include <asm/nmi.h> -#include <asm/hw_irq.h> -#include <asm/mach_apic.h> - -/* This keeps a track of which one is crashing cpu. */ -static int crashing_cpu; - -#ifdef CONFIG_SMP -static atomic_t waiting_for_crash_ipi; - -static int crash_nmi_callback(struct notifier_block *self, - unsigned long val, void *data) -{ - struct pt_regs *regs; - int cpu; - - if (val != DIE_NMI_IPI) - return NOTIFY_OK; - - regs = ((struct die_args *)data)->regs; - cpu = raw_smp_processor_id(); - - /* - * Don't do anything if this handler is invoked on crashing cpu. - * Otherwise, system will completely hang. Crashing cpu can get - * an NMI if system was initially booted with nmi_watchdog parameter. - */ - if (cpu == crashing_cpu) - return NOTIFY_STOP; - local_irq_disable(); - - crash_save_cpu(regs, cpu); - disable_local_APIC(); - atomic_dec(&waiting_for_crash_ipi); - /* Assume hlt works */ - for(;;) - halt(); - - return 1; -} - -static void smp_send_nmi_allbutself(void) -{ - send_IPI_allbutself(NMI_VECTOR); -} - -/* - * This code is a best effort heuristic to get the - * other cpus to stop executing. So races with - * cpu hotplug shouldn't matter. - */ - -static struct notifier_block crash_nmi_nb = { - .notifier_call = crash_nmi_callback, -}; - -static void nmi_shootdown_cpus(void) -{ - unsigned long msecs; - - atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); - if (register_die_notifier(&crash_nmi_nb)) - return; /* return what? */ - - /* - * Ensure the new callback function is set before sending - * out the NMI - */ - wmb(); - - smp_send_nmi_allbutself(); - - msecs = 1000; /* Wait at most a second for the other cpus to stop */ - while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { - mdelay(1); - msecs--; - } - /* Leave the nmi callback set */ - disable_local_APIC(); -} -#else -static void nmi_shootdown_cpus(void) -{ - /* There are no cpus to shootdown */ -} -#endif - -void machine_crash_shutdown(struct pt_regs *regs) -{ - /* - * This function is only called after the system - * has panicked or is otherwise in a critical state. - * The minimum amount of code to allow a kexec'd kernel - * to run successfully needs to happen here. - * - * In practice this means shooting down the other cpus in - * an SMP system. - */ - /* The kernel is broken so disable interrupts */ - local_irq_disable(); - - /* Make a note of crashing cpu. Will be used in NMI callback.*/ - crashing_cpu = smp_processor_id(); - nmi_shootdown_cpus(); - - if(cpu_has_apic) - disable_local_APIC(); - - disable_IO_APIC(); - - crash_save_cpu(regs, smp_processor_id()); -} diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c index 32e75d0731a9..72d0c56c1b48 100644 --- a/arch/x86/kernel/crash_dump_32.c +++ b/arch/x86/kernel/crash_dump_32.c @@ -47,6 +47,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, if (!kdump_buf_page) { printk(KERN_WARNING "Kdump: Kdump buffer page not" " allocated\n"); + kunmap_atomic(vaddr, KM_PTE0); return -EFAULT; } copy_page(kdump_buf_page, vaddr); diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index 3c86b979a40a..18f500d185a2 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c @@ -51,6 +51,13 @@ struct resource code_resource = { .flags = IORESOURCE_BUSY | IORESOURCE_MEM }; +struct resource bss_resource = { + .name = "Kernel bss", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + static struct resource system_rom_resource = { .name = "System ROM", .start = 0xf0000, @@ -254,7 +261,9 @@ static void __init probe_roms(void) * and also for regions reported as reserved by the e820. */ static void __init -legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource) +legacy_init_iomem_resources(struct resource *code_resource, + struct resource *data_resource, + struct resource *bss_resource) { int i; @@ -287,8 +296,10 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat */ request_resource(res, code_resource); request_resource(res, data_resource); + request_resource(res, bss_resource); #ifdef CONFIG_KEXEC - request_resource(res, &crashk_res); + if (crashk_res.start != crashk_res.end) + request_resource(res, &crashk_res); #endif } } @@ -306,9 +317,11 @@ static int __init request_standard_resources(void) printk("Setting up standard PCI resources\n"); if (efi_enabled) - efi_initialize_iomem_resources(&code_resource, &data_resource); + efi_initialize_iomem_resources(&code_resource, + &data_resource, &bss_resource); else - legacy_init_iomem_resources(&code_resource, &data_resource); + legacy_init_iomem_resources(&code_resource, + &data_resource, &bss_resource); /* EFI systems may still have VGA */ request_resource(&iomem_resource, &video_ram_resource); @@ -705,7 +718,7 @@ void __init e820_register_memory(void) int i; /* - * Search for the bigest gap in the low 32 bits of the e820 + * Search for the biggest gap in the low 32 bits of the e820 * memory space. */ last = 0x100000000ull; diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 0f4d5e209e9b..04698e0b056c 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -24,7 +24,7 @@ #include <asm/page.h> #include <asm/e820.h> #include <asm/proto.h> -#include <asm/bootsetup.h> +#include <asm/setup.h> #include <asm/sections.h> struct e820map e820; @@ -47,7 +47,7 @@ unsigned long end_pfn_map; */ static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; -extern struct resource code_resource, data_resource; +extern struct resource code_resource, data_resource, bss_resource; /* Check for some hardcoded bad areas that early boot is not allowed to touch */ static inline int bad_addr(unsigned long *addrp, unsigned long size) @@ -68,10 +68,15 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size) /* initrd */ #ifdef CONFIG_BLK_DEV_INITRD - if (LOADER_TYPE && INITRD_START && last >= INITRD_START && - addr < INITRD_START+INITRD_SIZE) { - *addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE); - return 1; + if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; + unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; + unsigned long ramdisk_end = ramdisk_image+ramdisk_size; + + if (last >= ramdisk_image && addr < ramdisk_end) { + *addrp = PAGE_ALIGN(ramdisk_end); + return 1; + } } #endif /* kernel code */ @@ -220,8 +225,10 @@ void __init e820_reserve_resources(void) */ request_resource(res, &code_resource); request_resource(res, &data_resource); + request_resource(res, &bss_resource); #ifdef CONFIG_KEXEC - request_resource(res, &crashk_res); + if (crashk_res.start != crashk_res.end) + request_resource(res, &crashk_res); #endif } } @@ -594,8 +601,8 @@ void __init setup_memory_region(void) * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */ - sanitize_e820_map(E820_MAP, &E820_MAP_NR); - if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) + sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); + if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) early_panic("Cannot find a valid memory map"); printk(KERN_INFO "BIOS-provided physical RAM map:\n"); e820_print_map("BIOS-e820"); @@ -723,3 +730,22 @@ __init void e820_setup_gap(void) printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", pci_mem_start, gapstart, gapsize); } + +int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) +{ + int i; + + if (slot < 0 || slot >= e820.nr_map) + return -1; + for (i = slot; i < e820.nr_map; i++) { + if (e820.map[i].type != E820_RAM) + continue; + break; + } + if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT)) + return -1; + *addr = e820.map[i].addr; + *size = min_t(u64, e820.map[i].size + e820.map[i].addr, + max_pfn << PAGE_SHIFT) - *addr; + return i + 1; +} diff --git a/arch/x86/kernel/early-quirks_64.c b/arch/x86/kernel/early-quirks.c index 13aa4fd728f3..88bb83ec895f 100644 --- a/arch/x86/kernel/early-quirks_64.c +++ b/arch/x86/kernel/early-quirks.c @@ -13,33 +13,41 @@ #include <linux/acpi.h> #include <linux/pci_ids.h> #include <asm/pci-direct.h> -#include <asm/proto.h> -#include <asm/iommu.h> #include <asm/dma.h> +#include <asm/io_apic.h> +#include <asm/apic.h> + +#ifdef CONFIG_GART_IOMMU +#include <asm/gart.h> +#endif static void __init via_bugs(void) { -#ifdef CONFIG_IOMMU +#ifdef CONFIG_GART_IOMMU if ((end_pfn > MAX_DMA32_PFN || force_iommu) && - !iommu_aperture_allowed) { + !gart_iommu_aperture_allowed) { printk(KERN_INFO - "Looks like a VIA chipset. Disabling IOMMU. Override with iommu=allowed\n"); - iommu_aperture_disabled = 1; + "Looks like a VIA chipset. Disabling IOMMU." + " Override with iommu=allowed\n"); + gart_iommu_aperture_disabled = 1; } #endif } #ifdef CONFIG_ACPI +#ifdef CONFIG_X86_IO_APIC static int __init nvidia_hpet_check(struct acpi_table_header *header) { return 0; } -#endif +#endif /* CONFIG_X86_IO_APIC */ +#endif /* CONFIG_ACPI */ static void __init nvidia_bugs(void) { #ifdef CONFIG_ACPI +#ifdef CONFIG_X86_IO_APIC /* * All timer overrides on Nvidia are * wrong unless HPET is enabled. @@ -59,17 +67,20 @@ static void __init nvidia_bugs(void) "try acpi_use_timer_override\n"); } #endif +#endif /* RED-PEN skip them on mptables too? */ } static void __init ati_bugs(void) { +#ifdef CONFIG_X86_IO_APIC if (timer_over_8254 == 1) { timer_over_8254 = 0; printk(KERN_INFO - "ATI board detected. Disabling timer routing over 8254.\n"); + "ATI board detected. Disabling timer routing over 8254.\n"); } +#endif } struct chipset { @@ -104,7 +115,7 @@ void __init early_quirks(void) if (class == 0xffffffff) break; - if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) + if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) continue; vendor = read_pci_config(num, slot, func, diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index fd9aff3f3890..b7d6c23f2871 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -6,15 +6,10 @@ #include <asm/io.h> #include <asm/processor.h> #include <asm/fcntl.h> +#include <asm/setup.h> #include <xen/hvc-console.h> /* Simple VGA output */ - -#ifdef __i386__ -#include <asm/setup.h> -#else -#include <asm/bootsetup.h> -#endif #define VGABASE (__ISA_IO_base + 0xb8000) static int max_ypos = 25, max_xpos = 80; @@ -234,10 +229,10 @@ static int __init setup_early_printk(char *buf) early_serial_init(buf); early_console = &early_serial_console; } else if (!strncmp(buf, "vga", 3) - && SCREEN_INFO.orig_video_isVGA == 1) { - max_xpos = SCREEN_INFO.orig_video_cols; - max_ypos = SCREEN_INFO.orig_video_lines; - current_ypos = SCREEN_INFO.orig_y; + && boot_params.screen_info.orig_video_isVGA == 1) { + max_xpos = boot_params.screen_info.orig_video_cols; + max_ypos = boot_params.screen_info.orig_video_lines; + current_ypos = boot_params.screen_info.orig_y; early_console = &early_vga_console; } else if (!strncmp(buf, "simnow", 6)) { simnow_init(buf + 6); diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c index 2452c6fbe992..e2be78f49399 100644 --- a/arch/x86/kernel/efi_32.c +++ b/arch/x86/kernel/efi_32.c @@ -331,11 +331,13 @@ void __init efi_init(void) memset(&efi, 0, sizeof(efi) ); memset(&efi_phys, 0, sizeof(efi_phys)); - efi_phys.systab = EFI_SYSTAB; - memmap.phys_map = EFI_MEMMAP; - memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE; - memmap.desc_version = EFI_MEMDESC_VERSION; - memmap.desc_size = EFI_MEMDESC_SIZE; + efi_phys.systab = + (efi_system_table_t *)boot_params.efi_info.efi_systab; + memmap.phys_map = (void *)boot_params.efi_info.efi_memmap; + memmap.nr_map = boot_params.efi_info.efi_memmap_size/ + boot_params.efi_info.efi_memdesc_size; + memmap.desc_version = boot_params.efi_info.efi_memdesc_version; + memmap.desc_size = boot_params.efi_info.efi_memdesc_size; efi.systab = (efi_system_table_t *) boot_ioremap((unsigned long) efi_phys.systab, @@ -446,7 +448,8 @@ void __init efi_init(void) printk(KERN_ERR PFX "Could not map the runtime service table!\n"); /* Map the EFI memory map for use until paging_init() */ - memmap.map = boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE); + memmap.map = boot_ioremap(boot_params.efi_info.efi_memmap, + boot_params.efi_info.efi_memmap_size); if (memmap.map == NULL) printk(KERN_ERR PFX "Could not map the EFI memory map!\n"); @@ -600,7 +603,8 @@ void __init efi_enter_virtual_mode(void) void __init efi_initialize_iomem_resources(struct resource *code_resource, - struct resource *data_resource) + struct resource *data_resource, + struct resource *bss_resource) { struct resource *res; efi_memory_desc_t *md; @@ -672,6 +676,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, if (md->type == EFI_CONVENTIONAL_MEMORY) { request_resource(res, code_resource); request_resource(res, data_resource); + request_resource(res, bss_resource); #ifdef CONFIG_KEXEC request_resource(res, &crashk_res); #endif diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 8099fea0a72f..dc7f938e5015 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -437,7 +437,7 @@ ldt_ss: * is still available to implement the setting of the high * 16-bits in the INTERRUPT_RETURN paravirt-op. */ - cmpl $0, paravirt_ops+PARAVIRT_enabled + cmpl $0, pv_info+PARAVIRT_enabled jne restore_nocheck #endif diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index f1cacd4897f7..3a058bb16409 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -988,7 +988,7 @@ child_rip: movq %rsi, %rdi call *%rax # exit - xorl %edi, %edi + mov %eax, %edi call do_exit CFI_ENDPROC ENDPROC(child_rip) diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 47496a40e84f..ce703e21c912 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -24,12 +24,19 @@ #include <acpi/acpi_bus.h> #endif -/* which logical CPU number maps to which CPU (physical APIC ID) */ -u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly +/* + * which logical CPU number maps to which CPU (physical APIC ID) + * + * The following static array is used during kernel startup + * and the x86_cpu_to_apicid_ptr contains the address of the + * array during this time. Is it zeroed when the per_cpu + * data area is removed. + */ +u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata = { [0 ... NR_CPUS-1] = BAD_APICID }; -EXPORT_SYMBOL(x86_cpu_to_apicid); - -u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; +void *x86_cpu_to_apicid_ptr; +DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID; +EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); struct genapic __read_mostly *genapic = &apic_flat; diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index ecb01eefdd27..07352b74bda6 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -52,7 +52,6 @@ static void flat_init_apic_ldr(void) num = smp_processor_id(); id = 1UL << num; - x86_cpu_to_log_apicid[num] = id; apic_write(APIC_DFR, APIC_DFR_FLAT); val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; val |= SET_APIC_LOGICAL_ID(id); @@ -173,7 +172,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) */ cpu = first_cpu(cpumask); if ((unsigned)cpu < NR_CPUS) - return x86_cpu_to_apicid[cpu]; + return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 8561f626edad..6b3469311e42 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -14,7 +14,6 @@ #include <asm/processor.h> #include <asm/proto.h> #include <asm/smp.h> -#include <asm/bootsetup.h> #include <asm/setup.h> #include <asm/desc.h> #include <asm/pgtable.h> @@ -36,26 +35,15 @@ static void __init clear_bss(void) (unsigned long) __bss_stop - (unsigned long) __bss_start); } -#define NEW_CL_POINTER 0x228 /* Relative to real mode data */ -#define OLD_CL_MAGIC_ADDR 0x20 -#define OLD_CL_MAGIC 0xA33F -#define OLD_CL_OFFSET 0x22 - static void __init copy_bootdata(char *real_mode_data) { - unsigned long new_data; char * command_line; - memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE); - new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER); - if (!new_data) { - if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) { - return; - } - new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET); + memcpy(&boot_params, real_mode_data, sizeof boot_params); + if (boot_params.hdr.cmd_line_ptr) { + command_line = __va(boot_params.hdr.cmd_line_ptr); + memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); } - command_line = __va(new_data); - memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); } void __init x86_64_start_kernel(char * real_mode_data) @@ -70,7 +58,7 @@ void __init x86_64_start_kernel(char * real_mode_data) for (i = 0; i < IDT_ENTRIES; i++) set_intr_gate(i, early_idt_handler); - asm volatile("lidt %0" :: "m" (idt_descr)); + load_idt((const struct desc_ptr *)&idt_descr); early_printk("Kernel alive\n"); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 9150ca9b5f80..374b7ece8961 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -51,6 +51,15 @@ */ LOW_PAGES = 1<<(32-PAGE_SHIFT_asm) +/* + * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate + * pagetables from above the 16MB DMA limit, so we'll have to set + * up pagetables 16MB more (worst-case): + */ +#ifdef CONFIG_DEBUG_PAGEALLOC +LOW_PAGES = LOW_PAGES + 0x1000000 +#endif + #if PTRS_PER_PMD > 1 PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD #else @@ -70,22 +79,30 @@ INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_ */ .section .text.head,"ax",@progbits ENTRY(startup_32) + /* check to see if KEEP_SEGMENTS flag is meaningful */ + cmpw $0x207, BP_version(%esi) + jb 1f + + /* test KEEP_SEGMENTS flag to see if the bootloader is asking + us to not reload segments */ + testb $(1<<6), BP_loadflags(%esi) + jnz 2f /* * Set segments to known values. */ - cld - lgdt boot_gdt_descr - __PAGE_OFFSET +1: lgdt boot_gdt_descr - __PAGE_OFFSET movl $(__BOOT_DS),%eax movl %eax,%ds movl %eax,%es movl %eax,%fs movl %eax,%gs +2: /* * Clear BSS first so that there are no surprises... - * No need to cld as DF is already clear from cld above... */ + cld xorl %eax,%eax movl $__bss_start - __PAGE_OFFSET,%edi movl $__bss_stop - __PAGE_OFFSET,%ecx @@ -107,18 +124,42 @@ ENTRY(startup_32) movsl movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi andl %esi,%esi - jnz 2f # New command line protocol - cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR - jne 1f - movzwl OLD_CL_OFFSET,%esi - addl $(OLD_CL_BASE_ADDR),%esi -2: + jz 1f # No comand line movl $(boot_command_line - __PAGE_OFFSET),%edi movl $(COMMAND_LINE_SIZE/4),%ecx rep movsl 1: +#ifdef CONFIG_PARAVIRT + cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET) + jb default_entry + + /* Paravirt-compatible boot parameters. Look to see what architecture + we're booting under. */ + movl (boot_params + BP_hardware_subarch - __PAGE_OFFSET), %eax + cmpl $num_subarch_entries, %eax + jae bad_subarch + + movl subarch_entries - __PAGE_OFFSET(,%eax,4), %eax + subl $__PAGE_OFFSET, %eax + jmp *%eax + +bad_subarch: +WEAK(lguest_entry) +WEAK(xen_entry) + /* Unknown implementation; there's really + nothing we can do at this point. */ + ud2a +.data +subarch_entries: + .long default_entry /* normal x86/PC */ + .long lguest_entry /* lguest hypervisor */ + .long xen_entry /* Xen hypervisor */ +num_subarch_entries = (. - subarch_entries) / 4 +.previous +#endif /* CONFIG_PARAVIRT */ + /* * Initialize page tables. This creates a PDE and a set of page * tables, which are located immediately beyond _end. The variable @@ -131,6 +172,7 @@ ENTRY(startup_32) */ page_pde_offset = (__PAGE_OFFSET >> 20); +default_entry: movl $(pg0 - __PAGE_OFFSET), %edi movl $(swapper_pg_dir - __PAGE_OFFSET), %edx movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ @@ -443,6 +485,7 @@ early_page_fault: early_fault: cld #ifdef CONFIG_PRINTK + pusha movl $(__KERNEL_DS),%eax movl %eax,%ds movl %eax,%es @@ -534,8 +577,15 @@ int_msg: .asciz "Unknown interrupt or fault at EIP %p %p %p\n" fault_msg: - .ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n" - .asciz "Stack: %p %p %p %p %p %p %p %p\n" + .ascii \ +/* fault info: */ "BUG: Int %d: CR2 %p\n" \ +/* pusha regs: */ " EDI %p ESI %p EBP %p ESP %p\n" \ + " EBX %p EDX %p ECX %p EAX %p\n" \ +/* fault frame: */ " err %p EIP %p CS %p flg %p\n" \ + \ + "Stack: %p %p %p %p %p %p %p %p\n" \ + " %p %p %p %p %p %p %p %p\n" \ + " %p %p %p %p %p %p %p %p\n" #include "../../x86/xen/xen-head.S" diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index f8367074da0d..53303f2e5475 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -69,12 +69,15 @@ static inline void hpet_clear_mapping(void) * HPET command line enable / disable */ static int boot_hpet_disable; +int hpet_force_user; static int __init hpet_setup(char* str) { if (str) { if (!strncmp("disable", str, 7)) boot_hpet_disable = 1; + if (!strncmp("force", str, 5)) + hpet_force_user = 1; } return 1; } @@ -350,7 +353,7 @@ static int hpet_clocksource_register(void) * * hpet period is in femto seconds per cycle * so we need to convert this to ns/cyc units - * aproximated by mult/2^shift + * approximated by mult/2^shift * * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift * fsec/cyc * 1ns/1000000fsec * 2^shift = mult diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index e3d4b73bfdb0..edd39ccf139e 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -1,4 +1,5 @@ #include <linux/module.h> +#include <asm/semaphore.h> #include <asm/checksum.h> #include <asm/desc.h> diff --git a/arch/x86/kernel/i387_64.c b/arch/x86/kernel/i387_64.c index 56c1f1147109..bfaff28fb134 100644 --- a/arch/x86/kernel/i387_64.c +++ b/arch/x86/kernel/i387_64.c @@ -92,13 +92,14 @@ int save_i387(struct _fpstate __user *buf) if (task_thread_info(tsk)->status & TS_USEDFPU) { err = save_i387_checking((struct i387_fxsave_struct __user *)buf); if (err) return err; + task_thread_info(tsk)->status &= ~TS_USEDFPU; stts(); - } else { - if (__copy_to_user(buf, &tsk->thread.i387.fxsave, + } else { + if (__copy_to_user(buf, &tsk->thread.i387.fxsave, sizeof(struct i387_fxsave_struct))) return -1; - } - return 1; + } + return 1; } /* diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 5cc8841ca2c6..a42c80745325 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -86,7 +86,7 @@ static int pit_next_event(unsigned long delta, struct clock_event_device *evt) * On UP the PIT can serve all of the possible timer functions. On SMP systems * it can be solely used for the global tick. * - * The profiling and update capabilites are switched off once the local apic is + * The profiling and update capabilities are switched off once the local apic is * registered. This mechanism replaces the previous #ifdef LOCAL_APIC - * !using_apic_timer decisions in do_timer_interrupt_hook() */ diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c index 679bb33acbf1..f634fc715c99 100644 --- a/arch/x86/kernel/i8259_32.c +++ b/arch/x86/kernel/i8259_32.c @@ -349,7 +349,11 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id) * New motherboards sometimes make IRQ 13 be a PCI interrupt, * so allow interrupt sharing. */ -static struct irqaction fpu_irq = { math_error_irq, 0, CPU_MASK_NONE, "fpu", NULL, NULL }; +static struct irqaction fpu_irq = { + .handler = math_error_irq, + .mask = CPU_MASK_NONE, + .name = "fpu", +}; void __init init_ISA_irqs (void) { @@ -399,7 +403,8 @@ void __init native_init_IRQ(void) int vector = FIRST_EXTERNAL_VECTOR + i; if (i >= NR_IRQS) break; - if (vector != SYSCALL_VECTOR) + /* SYSCALL_VECTOR was reserved in trap_init. */ + if (!test_bit(vector, used_vectors)) set_intr_gate(vector, interrupt[i]); } diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c index eb72976cc13c..3f27ea0b9816 100644 --- a/arch/x86/kernel/i8259_64.c +++ b/arch/x86/kernel/i8259_64.c @@ -395,7 +395,11 @@ device_initcall(i8259A_init_sysfs); * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; +static struct irqaction irq2 = { + .handler = no_action, + .mask = CPU_MASK_NONE, + .name = "cascade", +}; DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [0 ... IRQ0_VECTOR - 1] = -1, [IRQ0_VECTOR] = 0, diff --git a/arch/x86/kernel/init_task_32.c b/arch/x86/kernel/init_task.c index d26fc063a760..468c9c437842 100644 --- a/arch/x86/kernel/init_task_32.c +++ b/arch/x86/kernel/init_task.c @@ -15,7 +15,6 @@ static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); - EXPORT_SYMBOL(init_mm); /* @@ -25,7 +24,7 @@ EXPORT_SYMBOL(init_mm); * way process stacks are handled. This is done by having a special * "init_task" linker map entry.. */ -union thread_union init_thread_union +union thread_union init_thread_union __attribute__((__section__(".data.init_task"))) = { INIT_THREAD_INFO(init_task) }; @@ -35,12 +34,14 @@ union thread_union init_thread_union * All other task structs will be allocated on slabs in fork.c */ struct task_struct init_task = INIT_TASK(init_task); - EXPORT_SYMBOL(init_task); /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, - * no more per-task TSS's. - */ + * no more per-task TSS's. The TSS size is kept cacheline-aligned + * so they are allowed to end up in the .data.cacheline_aligned + * section. Since TSS's are completely CPU-local, we want them + * on exact cacheline boundaries, to eliminate cacheline ping-pong. + */ DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; diff --git a/arch/x86/kernel/init_task_64.c b/arch/x86/kernel/init_task_64.c deleted file mode 100644 index 4ff33d4f8551..000000000000 --- a/arch/x86/kernel/init_task_64.c +++ /dev/null @@ -1,54 +0,0 @@ -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/init_task.h> -#include <linux/fs.h> -#include <linux/mqueue.h> - -#include <asm/uaccess.h> -#include <asm/pgtable.h> -#include <asm/desc.h> - -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; -static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -struct mm_struct init_mm = INIT_MM(init_mm); - -EXPORT_SYMBOL(init_mm); - -/* - * Initial task structure. - * - * We need to make sure that this is 8192-byte aligned due to the - * way process stacks are handled. This is done by having a special - * "init_task" linker map entry.. - */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; - -/* - * Initial task structure. - * - * All other task structs will be allocated on slabs in fork.c - */ -struct task_struct init_task = INIT_TASK(init_task); - -EXPORT_SYMBOL(init_task); -/* - * per-CPU TSS segments. Threads are completely 'soft' on Linux, - * no more per-task TSS's. The TSS size is kept cacheline-aligned - * so they are allowed to end up in the .data.cacheline_aligned - * section. Since TSS's are completely CPU-local, we want them - * on exact cacheline boundaries, to eliminate cacheline ping-pong. - */ -DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; - -/* Copies of the original ist values from the tss are only accessed during - * debugging, no special alignment required. - */ -DEFINE_PER_CPU(struct orig_ist, orig_ist); - -#define ALIGN_TO_4K __attribute__((section(".data.init_task"))) diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index e2f4a1c68547..f35c6eb33da9 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -378,7 +378,7 @@ static struct irq_cpu_info { #define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) -#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i))) static cpumask_t balance_irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL @@ -584,7 +584,7 @@ tryanotherirq: imbalance = move_this_load; - /* For physical_balance case, we accumlated both load + /* For physical_balance case, we accumulated both load * values in the one of the siblings cpu_irq[], * to use the same code for physical and logical processors * as much as possible. @@ -598,7 +598,7 @@ tryanotherirq: * (A+B)/2 vs B */ load = CPU_IRQ(min_loaded) >> 1; - for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) { + for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) { if (load > CPU_IRQ(j)) { /* This won't change cpu_sibling_map[min_loaded] */ load = CPU_IRQ(j); @@ -1198,7 +1198,7 @@ static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 } static int __assign_irq_vector(int irq) { static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; - int vector, offset, i; + int vector, offset; BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); @@ -1215,11 +1215,8 @@ next: } if (vector == current_vector) return -ENOSPC; - if (vector == SYSCALL_VECTOR) + if (test_and_set_bit(vector, used_vectors)) goto next; - for (i = 0; i < NR_IRQ_VECTORS; i++) - if (irq_vector[i] == vector) - goto next; current_vector = vector; current_offset = offset; @@ -1296,6 +1293,11 @@ static void __init setup_IO_APIC_irqs(void) continue; } + if (!first_notcon) { + apic_printk(APIC_VERBOSE, " not connected.\n"); + first_notcon = 1; + } + entry.trigger = irq_trigger(idx); entry.polarity = irq_polarity(idx); @@ -2290,6 +2292,12 @@ static inline void __init check_timer(void) void __init setup_IO_APIC(void) { + int i; + + /* Reserve all the system vectors. */ + for (i = FIRST_SYSTEM_VECTOR; i < NR_VECTORS; i++) + set_bit(i, used_vectors); + enable_IO_APIC(); if (acpi_ioapic) @@ -2467,7 +2475,7 @@ void destroy_irq(unsigned int irq) } /* - * MSI mesage composition + * MSI message composition */ #ifdef CONFIG_PCI_MSI static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 966fa1062491..953328b55a30 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -31,6 +31,7 @@ #include <linux/sysdev.h> #include <linux/msi.h> #include <linux/htirq.h> +#include <linux/dmar.h> #ifdef CONFIG_ACPI #include <acpi/acpi_bus.h> #endif @@ -875,6 +876,10 @@ static void __init setup_IO_APIC_irqs(void) apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin); continue; } + if (!first_notcon) { + apic_printk(APIC_VERBOSE, " not connected.\n"); + first_notcon = 1; + } irq = pin_2_irq(idx, apic, pin); add_pin_to_irq(irq, apic, pin); @@ -885,7 +890,7 @@ static void __init setup_IO_APIC_irqs(void) } if (!first_notcon) - apic_printk(APIC_VERBOSE," not connected.\n"); + apic_printk(APIC_VERBOSE, " not connected.\n"); } /* @@ -1766,7 +1771,7 @@ __setup("no_timer_check", notimercheck); /* * - * IRQ's that are handled by the PIC in the MPS IOAPIC case. + * IRQs that are handled by the PIC in the MPS IOAPIC case. * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. * Linux doesn't really care, as it's not actually used * for any interrupt handling anyway. @@ -1845,7 +1850,7 @@ static struct sysdev_class ioapic_sysdev_class = { static int __init ioapic_init_sysfs(void) { struct sys_device * dev; - int i, size, error = 0; + int i, size, error; error = sysdev_class_register(&ioapic_sysdev_class); if (error) @@ -1854,12 +1859,11 @@ static int __init ioapic_init_sysfs(void) for (i = 0; i < nr_ioapics; i++ ) { size = sizeof(struct sys_device) + nr_ioapic_registers[i] * sizeof(struct IO_APIC_route_entry); - mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); + mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); if (!mp_ioapic_data[i]) { printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); continue; } - memset(mp_ioapic_data[i], 0, size); dev = &mp_ioapic_data[i]->dev; dev->id = i; dev->cls = &ioapic_sysdev_class; @@ -1918,7 +1922,7 @@ void destroy_irq(unsigned int irq) } /* - * MSI mesage composition + * MSI message composition */ #ifdef CONFIG_PCI_MSI static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) @@ -2028,8 +2032,64 @@ void arch_teardown_msi_irq(unsigned int irq) destroy_irq(irq); } -#endif /* CONFIG_PCI_MSI */ +#ifdef CONFIG_DMAR +#ifdef CONFIG_SMP +static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + struct msi_msg msg; + unsigned int dest; + cpumask_t tmp; + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + dmar_msi_read(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + dmar_msi_write(irq, &msg); + irq_desc[irq].affinity = mask; +} +#endif /* CONFIG_SMP */ + +struct irq_chip dmar_msi_type = { + .name = "DMAR_MSI", + .unmask = dmar_msi_unmask, + .mask = dmar_msi_mask, + .ack = ack_apic_edge, +#ifdef CONFIG_SMP + .set_affinity = dmar_msi_set_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +int arch_setup_dmar_msi(unsigned int irq) +{ + int ret; + struct msi_msg msg; + + ret = msi_compose_msg(NULL, irq, &msg); + if (ret < 0) + return ret; + dmar_msi_write(irq, &msg); + set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, + "edge"); + return 0; +} +#endif + +#endif /* CONFIG_PCI_MSI */ /* * Hypertransport interrupt support */ diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index e173b763f148..d3fde94f7345 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -255,9 +255,17 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { + unsigned any_count = 0; + spin_lock_irqsave(&irq_desc[i].lock, flags); +#ifndef CONFIG_SMP + any_count = kstat_irqs(i); +#else + for_each_online_cpu(j) + any_count |= kstat_cpu(j).irqs[i]; +#endif action = irq_desc[i].action; - if (!action) + if (!action && !any_count) goto skip; seq_printf(p, "%3d: ",i); #ifndef CONFIG_SMP @@ -268,10 +276,12 @@ int show_interrupts(struct seq_file *p, void *v) #endif seq_printf(p, " %8s", irq_desc[i].chip->name); seq_printf(p, "-%-8s", irq_desc[i].name); - seq_printf(p, " %s", action->name); - for (action=action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); + if (action) { + seq_printf(p, " %s", action->name); + while ((action = action->next) != NULL) + seq_printf(p, ", %s", action->name); + } seq_putc(p, '\n'); skip: @@ -280,14 +290,41 @@ skip: seq_printf(p, "NMI: "); for_each_online_cpu(j) seq_printf(p, "%10u ", nmi_count(j)); - seq_putc(p, '\n'); + seq_printf(p, " Non-maskable interrupts\n"); #ifdef CONFIG_X86_LOCAL_APIC seq_printf(p, "LOC: "); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat,j).apic_timer_irqs); - seq_putc(p, '\n'); + seq_printf(p, " Local timer interrupts\n"); #endif +#ifdef CONFIG_SMP + seq_printf(p, "RES: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + per_cpu(irq_stat,j).irq_resched_count); + seq_printf(p, " Rescheduling interrupts\n"); + seq_printf(p, "CAL: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + per_cpu(irq_stat,j).irq_call_count); + seq_printf(p, " function call interrupts\n"); + seq_printf(p, "TLB: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + per_cpu(irq_stat,j).irq_tlb_count); + seq_printf(p, " TLB shootdowns\n"); +#endif + seq_printf(p, "TRM: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + per_cpu(irq_stat,j).irq_thermal_count); + seq_printf(p, " Thermal event interrupts\n"); + seq_printf(p, "SPU: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + per_cpu(irq_stat,j).irq_spurious_count); + seq_printf(p, " Spurious interrupts\n"); seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 865669efc540..6b5c730d67b9 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -62,9 +62,17 @@ int show_interrupts(struct seq_file *p, void *v) } if (i < NR_IRQS) { + unsigned any_count = 0; + spin_lock_irqsave(&irq_desc[i].lock, flags); +#ifndef CONFIG_SMP + any_count = kstat_irqs(i); +#else + for_each_online_cpu(j) + any_count |= kstat_cpu(j).irqs[i]; +#endif action = irq_desc[i].action; - if (!action) + if (!action && !any_count) goto skip; seq_printf(p, "%3d: ",i); #ifndef CONFIG_SMP @@ -76,9 +84,11 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, " %8s", irq_desc[i].chip->name); seq_printf(p, "-%-8s", irq_desc[i].name); - seq_printf(p, " %s", action->name); - for (action=action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); + if (action) { + seq_printf(p, " %s", action->name); + while ((action = action->next) != NULL) + seq_printf(p, ", %s", action->name); + } seq_putc(p, '\n'); skip: spin_unlock_irqrestore(&irq_desc[i].lock, flags); @@ -86,11 +96,37 @@ skip: seq_printf(p, "NMI: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); - seq_putc(p, '\n'); + seq_printf(p, " Non-maskable interrupts\n"); seq_printf(p, "LOC: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); - seq_putc(p, '\n'); + seq_printf(p, " Local timer interrupts\n"); +#ifdef CONFIG_SMP + seq_printf(p, "RES: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count); + seq_printf(p, " Rescheduling interrupts\n"); + seq_printf(p, "CAL: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count); + seq_printf(p, " function call interrupts\n"); + seq_printf(p, "TLB: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); + seq_printf(p, " TLB shootdowns\n"); +#endif + seq_printf(p, "TRM: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); + seq_printf(p, " Thermal event interrupts\n"); + seq_printf(p, "THR: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); + seq_printf(p, " Threshold APIC interrupts\n"); + seq_printf(p, "SPU: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); + seq_printf(p, " Spurious interrupts\n"); seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); } return 0; diff --git a/arch/x86/kernel/kprobes_32.c b/arch/x86/kernel/kprobes_32.c index e7d0d3c2ef64..d87a523070d1 100644 --- a/arch/x86/kernel/kprobes_32.c +++ b/arch/x86/kernel/kprobes_32.c @@ -41,6 +41,13 @@ void jprobe_return_end(void); DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); +struct kretprobe_blackpoint kretprobe_blacklist[] = { + {"__switch_to", }, /* This function switches only current task, but + doesn't switch kernel stack.*/ + {NULL, NULL} /* Terminator */ +}; +const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); + /* insert a jmp code */ static __always_inline void set_jmp_op(void *from, void *to) { @@ -557,12 +564,7 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) resume_execution(cur, regs, kcb); regs->eflags |= kcb->kprobe_saved_eflags; -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT - if (raw_irqs_disabled_flags(regs->eflags)) - trace_hardirqs_off(); - else - trace_hardirqs_on(); -#endif + trace_hardirqs_fixup_flags(regs->eflags); /*Restore back the original saved kprobes variables and continue. */ if (kcb->kprobe_status == KPROBE_REENTER) { @@ -584,7 +586,7 @@ out: return 1; } -static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -666,7 +668,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, ret = NOTIFY_STOP; break; case DIE_GPF: - case DIE_PAGE_FAULT: /* kprobe_running() needs smp_processor_id() */ preempt_disable(); if (kprobe_running() && diff --git a/arch/x86/kernel/kprobes_64.c b/arch/x86/kernel/kprobes_64.c index 62e28e52d784..3db3611933d8 100644 --- a/arch/x86/kernel/kprobes_64.c +++ b/arch/x86/kernel/kprobes_64.c @@ -48,6 +48,13 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p); DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); +struct kretprobe_blackpoint kretprobe_blacklist[] = { + {"__switch_to", }, /* This function switches only current task, but + doesn't switch kernel stack.*/ + {NULL, NULL} /* Terminator */ +}; +const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); + /* * returns non-zero if opcode modifies the interrupt flag. */ @@ -544,12 +551,7 @@ int __kprobes post_kprobe_handler(struct pt_regs *regs) resume_execution(cur, regs, kcb); regs->eflags |= kcb->kprobe_saved_rflags; -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT - if (raw_irqs_disabled_flags(regs->eflags)) - trace_hardirqs_off(); - else - trace_hardirqs_on(); -#endif + trace_hardirqs_fixup_flags(regs->eflags); /* Restore the original saved kprobes variables and continue. */ if (kcb->kprobe_status == KPROBE_REENTER) { @@ -657,7 +659,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, ret = NOTIFY_STOP; break; case DIE_GPF: - case DIE_PAGE_FAULT: /* kprobe_running() needs smp_processor_id() */ preempt_disable(); if (kprobe_running() && diff --git a/arch/x86/kernel/ldt_32.c b/arch/x86/kernel/ldt_32.c index a8b18421863a..9ff90a27c45f 100644 --- a/arch/x86/kernel/ldt_32.c +++ b/arch/x86/kernel/ldt_32.c @@ -92,13 +92,13 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) struct mm_struct * old_mm; int retval = 0; - init_MUTEX(&mm->context.sem); + mutex_init(&mm->context.lock); mm->context.size = 0; old_mm = current->mm; if (old_mm && old_mm->context.size > 0) { - down(&old_mm->context.sem); + mutex_lock(&old_mm->context.lock); retval = copy_ldt(&mm->context, &old_mm->context); - up(&old_mm->context.sem); + mutex_unlock(&old_mm->context.lock); } return retval; } @@ -130,7 +130,7 @@ static int read_ldt(void __user * ptr, unsigned long bytecount) if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; - down(&mm->context.sem); + mutex_lock(&mm->context.lock); size = mm->context.size*LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; @@ -138,7 +138,7 @@ static int read_ldt(void __user * ptr, unsigned long bytecount) err = 0; if (copy_to_user(ptr, mm->context.ldt, size)) err = -EFAULT; - up(&mm->context.sem); + mutex_unlock(&mm->context.lock); if (err < 0) goto error_return; if (size != bytecount) { @@ -194,7 +194,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) goto out; } - down(&mm->context.sem); + mutex_lock(&mm->context.lock); if (ldt_info.entry_number >= mm->context.size) { error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); if (error < 0) @@ -221,7 +221,7 @@ install: error = 0; out_unlock: - up(&mm->context.sem); + mutex_unlock(&mm->context.lock); out: return error; } diff --git a/arch/x86/kernel/ldt_64.c b/arch/x86/kernel/ldt_64.c index 3796523d616a..60e57abb8e90 100644 --- a/arch/x86/kernel/ldt_64.c +++ b/arch/x86/kernel/ldt_64.c @@ -96,13 +96,13 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) struct mm_struct * old_mm; int retval = 0; - init_MUTEX(&mm->context.sem); + mutex_init(&mm->context.lock); mm->context.size = 0; old_mm = current->mm; if (old_mm && old_mm->context.size > 0) { - down(&old_mm->context.sem); + mutex_lock(&old_mm->context.lock); retval = copy_ldt(&mm->context, &old_mm->context); - up(&old_mm->context.sem); + mutex_unlock(&old_mm->context.lock); } return retval; } @@ -133,7 +133,7 @@ static int read_ldt(void __user * ptr, unsigned long bytecount) if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; - down(&mm->context.sem); + mutex_lock(&mm->context.lock); size = mm->context.size*LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; @@ -141,7 +141,7 @@ static int read_ldt(void __user * ptr, unsigned long bytecount) err = 0; if (copy_to_user(ptr, mm->context.ldt, size)) err = -EFAULT; - up(&mm->context.sem); + mutex_unlock(&mm->context.lock); if (err < 0) goto error_return; if (size != bytecount) { @@ -193,7 +193,7 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) goto out; } - down(&mm->context.sem); + mutex_lock(&mm->context.lock); if (ldt_info.entry_number >= (unsigned)mm->context.size) { error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); if (error < 0) @@ -223,7 +223,7 @@ install: error = 0; out_unlock: - up(&mm->context.sem); + mutex_unlock(&mm->context.lock); out: return error; } diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index deda9a221cf2..11b935f4f886 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -10,6 +10,7 @@ #include <linux/kexec.h> #include <linux/delay.h> #include <linux/init.h> +#include <linux/numa.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> @@ -148,24 +149,14 @@ NORET_TYPE void machine_kexec(struct kimage *image) image->start, cpu_has_pae); } -/* crashkernel=size@addr specifies the location to reserve for - * a crash kernel. By reserving this memory we guarantee - * that linux never sets it up as a DMA target. - * Useful for holding code to do something appropriate - * after a kernel panic. - */ -static int __init parse_crashkernel(char *arg) +void arch_crash_save_vmcoreinfo(void) { - unsigned long size, base; - size = memparse(arg, &arg); - if (*arg == '@') { - base = memparse(arg+1, &arg); - /* FIXME: Do I want a sanity check - * to validate the memory range? - */ - crashk_res.start = base; - crashk_res.end = base + size - 1; - } - return 0; +#ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE + VMCOREINFO_SYMBOL(node_data); + VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); +#endif +#ifdef CONFIG_X86_PAE + VMCOREINFO_CONFIG(X86_PAE); +#endif } -early_param("crashkernel", parse_crashkernel); + diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index cd1899a2f0c5..aa3d2c8f7737 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -10,6 +10,7 @@ #include <linux/kexec.h> #include <linux/string.h> #include <linux/reboot.h> +#include <linux/numa.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> @@ -230,30 +231,13 @@ NORET_TYPE void machine_kexec(struct kimage *image) image->start); } -/* crashkernel=size@addr specifies the location to reserve for - * a crash kernel. By reserving this memory we guarantee - * that linux never set's it up as a DMA target. - * Useful for holding code to do something appropriate - * after a kernel panic. - */ -static int __init setup_crashkernel(char *arg) +void arch_crash_save_vmcoreinfo(void) { - unsigned long size, base; - char *p; - if (!arg) - return -EINVAL; - size = memparse(arg, &p); - if (arg == p) - return -EINVAL; - if (*p == '@') { - base = memparse(p+1, &p); - /* FIXME: Do I want a sanity check to validate the - * memory range? Yes you do, but it's too early for - * e820 -AK */ - crashk_res.start = base; - crashk_res.end = base + size - 1; - } - return 0; + VMCOREINFO_SYMBOL(init_level4_pgt); + +#ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE + VMCOREINFO_SYMBOL(node_data); + VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); +#endif } -early_param("crashkernel", setup_crashkernel); diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 09cf78110358..09c315214a5e 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c @@ -132,7 +132,7 @@ static struct ucode_cpu_info { static void collect_cpu_info(int cpu_num) { - struct cpuinfo_x86 *c = cpu_data + cpu_num; + struct cpuinfo_x86 *c = &cpu_data(cpu_num); struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; unsigned int val[2]; @@ -522,7 +522,7 @@ static struct platform_device *microcode_pdev; static int cpu_request_microcode(int cpu) { char name[30]; - struct cpuinfo_x86 *c = cpu_data + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); const struct firmware *firmware; void *buf; unsigned long size; @@ -570,7 +570,7 @@ static int cpu_request_microcode(int cpu) static int apply_microcode_check_cpu(int cpu) { - struct cpuinfo_x86 *c = cpu_data + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; cpumask_t old; unsigned int val[2]; diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c index 13abb4ebfb79..7a05a7f6099a 100644 --- a/arch/x86/kernel/mpparse_32.c +++ b/arch/x86/kernel/mpparse_32.c @@ -1001,7 +1001,7 @@ void __init mp_config_acpi_legacy_irqs (void) /* * Use the default configuration for the IRQs 0-15. Unless - * overriden by (MADT) interrupt source override entries. + * overridden by (MADT) interrupt source override entries. */ for (i = 0; i < 16; i++) { int idx; diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c index 8bf0ca03ac8e..ef4aab123581 100644 --- a/arch/x86/kernel/mpparse_64.c +++ b/arch/x86/kernel/mpparse_64.c @@ -57,6 +57,8 @@ unsigned long mp_lapic_addr = 0; /* Processor that is doing the boot up */ unsigned int boot_cpu_id = -1U; +EXPORT_SYMBOL(boot_cpu_id); + /* Internal processor count */ unsigned int num_processors __cpuinitdata = 0; @@ -86,7 +88,7 @@ static int __init mpf_checksum(unsigned char *mp, int len) return sum & 0xFF; } -static void __cpuinit MP_processor_info (struct mpc_config_processor *m) +static void __cpuinit MP_processor_info(struct mpc_config_processor *m) { int cpu; cpumask_t tmp_map; @@ -123,7 +125,18 @@ static void __cpuinit MP_processor_info (struct mpc_config_processor *m) cpu = 0; } bios_cpu_apicid[cpu] = m->mpc_apicid; - x86_cpu_to_apicid[cpu] = m->mpc_apicid; + /* + * We get called early in the the start_kernel initialization + * process when the per_cpu data area is not yet setup, so we + * use a static array that is removed after the per_cpu data + * area is created. + */ + if (x86_cpu_to_apicid_ptr) { + u8 *x86_cpu_to_apicid = (u8 *)x86_cpu_to_apicid_ptr; + x86_cpu_to_apicid[cpu] = m->mpc_apicid; + } else { + per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid; + } cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index c044de310b69..ee6eba4ecfea 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -112,7 +112,7 @@ static ssize_t msr_write(struct file *file, const char __user *buf, static int msr_open(struct inode *inode, struct file *file) { unsigned int cpu = iminor(file->f_path.dentry->d_inode); - struct cpuinfo_x86 *c = &(cpu_data)[cpu]; + struct cpuinfo_x86 *c = &cpu_data(cpu); if (cpu >= NR_CPUS || !cpu_online(cpu)) return -ENXIO; /* No such CPU */ @@ -133,37 +133,42 @@ static const struct file_operations msr_fops = { .open = msr_open, }; -static int msr_device_create(int i) +static int __cpuinit msr_device_create(int cpu) { - int err = 0; struct device *dev; - dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, i), "msr%d",i); - if (IS_ERR(dev)) - err = PTR_ERR(dev); - return err; + dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, cpu), + "msr%d", cpu); + return IS_ERR(dev) ? PTR_ERR(dev) : 0; } -static int msr_class_cpu_callback(struct notifier_block *nfb, +static void msr_device_destroy(int cpu) +{ + device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu)); +} + +static int __cpuinit msr_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; + int err = 0; switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - msr_device_create(cpu); + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + err = msr_device_create(cpu); break; + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: - device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu)); + msr_device_destroy(cpu); break; } - return NOTIFY_OK; + return err ? NOTIFY_BAD : NOTIFY_OK; } -static struct notifier_block __cpuinitdata msr_class_cpu_notifier = -{ +static struct notifier_block __cpuinitdata msr_class_cpu_notifier = { .notifier_call = msr_class_cpu_callback, }; @@ -196,7 +201,7 @@ static int __init msr_init(void) out_class: i = 0; for_each_online_cpu(i) - device_destroy(msr_class, MKDEV(MSR_MAJOR, i)); + msr_device_destroy(i); class_destroy(msr_class); out_chrdev: unregister_chrdev(MSR_MAJOR, "cpu/msr"); @@ -208,7 +213,7 @@ static void __exit msr_exit(void) { int cpu = 0; for_each_online_cpu(cpu) - device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu)); + msr_device_destroy(cpu); class_destroy(msr_class); unregister_chrdev(MSR_MAJOR, "cpu/msr"); unregister_hotcpu_notifier(&msr_class_cpu_notifier); diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index f803ed0ed1c4..600fd404e440 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -51,13 +51,13 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); static int endflag __initdata = 0; -#ifdef CONFIG_SMP /* The performance counters used by NMI_LOCAL_APIC don't trigger when * the CPU is idle. To make sure the NMI watchdog really ticks on all * CPUs during the test make them busy. */ static __init void nmi_cpu_busy(void *data) { +#ifdef CONFIG_SMP local_irq_enable_in_hardirq(); /* Intentionally don't use cpu_relax here. This is to make sure that the performance counter really ticks, @@ -67,8 +67,8 @@ static __init void nmi_cpu_busy(void *data) care if they get somewhat less cycles. */ while (endflag == 0) mb(); -} #endif +} static int __init check_nmi_watchdog(void) { diff --git a/arch/x86/kernel/paravirt_32.c b/arch/x86/kernel/paravirt_32.c index 739cfb207dd7..6a80d67c2121 100644 --- a/arch/x86/kernel/paravirt_32.c +++ b/arch/x86/kernel/paravirt_32.c @@ -42,32 +42,33 @@ void _paravirt_nop(void) static void __init default_banner(void) { printk(KERN_INFO "Booting paravirtualized kernel on %s\n", - paravirt_ops.name); + pv_info.name); } char *memory_setup(void) { - return paravirt_ops.memory_setup(); + return pv_init_ops.memory_setup(); } /* Simple instruction patching code. */ -#define DEF_NATIVE(name, code) \ - extern const char start_##name[], end_##name[]; \ - asm("start_" #name ": " code "; end_" #name ":") - -DEF_NATIVE(irq_disable, "cli"); -DEF_NATIVE(irq_enable, "sti"); -DEF_NATIVE(restore_fl, "push %eax; popf"); -DEF_NATIVE(save_fl, "pushf; pop %eax"); -DEF_NATIVE(iret, "iret"); -DEF_NATIVE(irq_enable_sysexit, "sti; sysexit"); -DEF_NATIVE(read_cr2, "mov %cr2, %eax"); -DEF_NATIVE(write_cr3, "mov %eax, %cr3"); -DEF_NATIVE(read_cr3, "mov %cr3, %eax"); -DEF_NATIVE(clts, "clts"); -DEF_NATIVE(read_tsc, "rdtsc"); - -DEF_NATIVE(ud2a, "ud2a"); +#define DEF_NATIVE(ops, name, code) \ + extern const char start_##ops##_##name[], end_##ops##_##name[]; \ + asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") + +DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); +DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); +DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); +DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); +DEF_NATIVE(pv_cpu_ops, iret, "iret"); +DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit"); +DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); +DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); +DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); +DEF_NATIVE(pv_cpu_ops, clts, "clts"); +DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); + +/* Undefined instruction for dealing with missing ops pointers. */ +static const unsigned char ud2a[] = { 0x0f, 0x0b }; static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) @@ -76,37 +77,29 @@ static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned ret; switch(type) { -#define SITE(x) case PARAVIRT_PATCH(x): start = start_##x; end = end_##x; goto patch_site - SITE(irq_disable); - SITE(irq_enable); - SITE(restore_fl); - SITE(save_fl); - SITE(iret); - SITE(irq_enable_sysexit); - SITE(read_cr2); - SITE(read_cr3); - SITE(write_cr3); - SITE(clts); - SITE(read_tsc); +#define SITE(ops, x) \ + case PARAVIRT_PATCH(ops.x): \ + start = start_##ops##_##x; \ + end = end_##ops##_##x; \ + goto patch_site + + SITE(pv_irq_ops, irq_disable); + SITE(pv_irq_ops, irq_enable); + SITE(pv_irq_ops, restore_fl); + SITE(pv_irq_ops, save_fl); + SITE(pv_cpu_ops, iret); + SITE(pv_cpu_ops, irq_enable_sysexit); + SITE(pv_mmu_ops, read_cr2); + SITE(pv_mmu_ops, read_cr3); + SITE(pv_mmu_ops, write_cr3); + SITE(pv_cpu_ops, clts); + SITE(pv_cpu_ops, read_tsc); #undef SITE patch_site: ret = paravirt_patch_insns(ibuf, len, start, end); break; - case PARAVIRT_PATCH(make_pgd): - case PARAVIRT_PATCH(make_pte): - case PARAVIRT_PATCH(pgd_val): - case PARAVIRT_PATCH(pte_val): -#ifdef CONFIG_X86_PAE - case PARAVIRT_PATCH(make_pmd): - case PARAVIRT_PATCH(pmd_val): -#endif - /* These functions end up returning exactly what - they're passed, in the same registers. */ - ret = paravirt_patch_nop(); - break; - default: ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); break; @@ -150,7 +143,7 @@ unsigned paravirt_patch_call(void *insnbuf, return 5; } -unsigned paravirt_patch_jmp(const void *target, void *insnbuf, +unsigned paravirt_patch_jmp(void *insnbuf, const void *target, unsigned long addr, unsigned len) { struct branch *b = insnbuf; @@ -165,22 +158,37 @@ unsigned paravirt_patch_jmp(const void *target, void *insnbuf, return 5; } +/* Neat trick to map patch type back to the call within the + * corresponding structure. */ +static void *get_call_destination(u8 type) +{ + struct paravirt_patch_template tmpl = { + .pv_init_ops = pv_init_ops, + .pv_time_ops = pv_time_ops, + .pv_cpu_ops = pv_cpu_ops, + .pv_irq_ops = pv_irq_ops, + .pv_apic_ops = pv_apic_ops, + .pv_mmu_ops = pv_mmu_ops, + }; + return *((void **)&tmpl + type); +} + unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, unsigned long addr, unsigned len) { - void *opfunc = *((void **)¶virt_ops + type); + void *opfunc = get_call_destination(type); unsigned ret; if (opfunc == NULL) /* If there's no function, patch it with a ud2a (BUG) */ - ret = paravirt_patch_insns(insnbuf, len, start_ud2a, end_ud2a); + ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); else if (opfunc == paravirt_nop) /* If the operation is a nop, then nop the callsite */ ret = paravirt_patch_nop(); - else if (type == PARAVIRT_PATCH(iret) || - type == PARAVIRT_PATCH(irq_enable_sysexit)) + else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || + type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit)) /* If operation requires a jmp, then jmp */ - ret = paravirt_patch_jmp(opfunc, insnbuf, addr, len); + ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); else /* Otherwise call the function; assume target could clobber any caller-save reg */ @@ -205,7 +213,7 @@ unsigned paravirt_patch_insns(void *insnbuf, unsigned len, void init_IRQ(void) { - paravirt_ops.init_IRQ(); + pv_irq_ops.init_IRQ(); } static void native_flush_tlb(void) @@ -233,7 +241,7 @@ extern void native_irq_enable_sysexit(void); static int __init print_banner(void) { - paravirt_ops.banner(); + pv_init_ops.banner(); return 0; } core_initcall(print_banner); @@ -273,47 +281,96 @@ int paravirt_disable_iospace(void) return ret; } -struct paravirt_ops paravirt_ops = { +static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; + +static inline void enter_lazy(enum paravirt_lazy_mode mode) +{ + BUG_ON(x86_read_percpu(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); + BUG_ON(preemptible()); + + x86_write_percpu(paravirt_lazy_mode, mode); +} + +void paravirt_leave_lazy(enum paravirt_lazy_mode mode) +{ + BUG_ON(x86_read_percpu(paravirt_lazy_mode) != mode); + BUG_ON(preemptible()); + + x86_write_percpu(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); +} + +void paravirt_enter_lazy_mmu(void) +{ + enter_lazy(PARAVIRT_LAZY_MMU); +} + +void paravirt_leave_lazy_mmu(void) +{ + paravirt_leave_lazy(PARAVIRT_LAZY_MMU); +} + +void paravirt_enter_lazy_cpu(void) +{ + enter_lazy(PARAVIRT_LAZY_CPU); +} + +void paravirt_leave_lazy_cpu(void) +{ + paravirt_leave_lazy(PARAVIRT_LAZY_CPU); +} + +enum paravirt_lazy_mode paravirt_get_lazy_mode(void) +{ + return x86_read_percpu(paravirt_lazy_mode); +} + +struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, .kernel_rpl = 0, .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ +}; - .patch = native_patch, +struct pv_init_ops pv_init_ops = { + .patch = native_patch, .banner = default_banner, .arch_setup = paravirt_nop, .memory_setup = machine_specific_memory_setup, +}; + +struct pv_time_ops pv_time_ops = { + .time_init = hpet_time_init, .get_wallclock = native_get_wallclock, .set_wallclock = native_set_wallclock, - .time_init = hpet_time_init, + .sched_clock = native_sched_clock, + .get_cpu_khz = native_calculate_cpu_khz, +}; + +struct pv_irq_ops pv_irq_ops = { .init_IRQ = native_init_IRQ, + .save_fl = native_save_fl, + .restore_fl = native_restore_fl, + .irq_disable = native_irq_disable, + .irq_enable = native_irq_enable, + .safe_halt = native_safe_halt, + .halt = native_halt, +}; +struct pv_cpu_ops pv_cpu_ops = { .cpuid = native_cpuid, .get_debugreg = native_get_debugreg, .set_debugreg = native_set_debugreg, .clts = native_clts, .read_cr0 = native_read_cr0, .write_cr0 = native_write_cr0, - .read_cr2 = native_read_cr2, - .write_cr2 = native_write_cr2, - .read_cr3 = native_read_cr3, - .write_cr3 = native_write_cr3, .read_cr4 = native_read_cr4, .read_cr4_safe = native_read_cr4_safe, .write_cr4 = native_write_cr4, - .save_fl = native_save_fl, - .restore_fl = native_restore_fl, - .irq_disable = native_irq_disable, - .irq_enable = native_irq_enable, - .safe_halt = native_safe_halt, - .halt = native_halt, .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, .write_msr = native_write_msr_safe, .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, - .sched_clock = native_sched_clock, - .get_cpu_khz = native_calculate_cpu_khz, .load_tr_desc = native_load_tr_desc, .set_ldt = native_set_ldt, .load_gdt = native_load_gdt, @@ -327,9 +384,19 @@ struct paravirt_ops paravirt_ops = { .write_idt_entry = write_dt_entry, .load_esp0 = native_load_esp0, + .irq_enable_sysexit = native_irq_enable_sysexit, + .iret = native_iret, + .set_iopl_mask = native_set_iopl_mask, .io_delay = native_io_delay, + .lazy_mode = { + .enter = paravirt_nop, + .leave = paravirt_nop, + }, +}; + +struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC .apic_write = native_apic_write, .apic_write_atomic = native_apic_write_atomic, @@ -338,11 +405,17 @@ struct paravirt_ops paravirt_ops = { .setup_secondary_clock = setup_secondary_APIC_clock, .startup_ipi_hook = paravirt_nop, #endif - .set_lazy_mode = paravirt_nop, +}; +struct pv_mmu_ops pv_mmu_ops = { .pagetable_setup_start = native_pagetable_setup_start, .pagetable_setup_done = native_pagetable_setup_done, + .read_cr2 = native_read_cr2, + .write_cr2 = native_write_cr2, + .read_cr3 = native_read_cr3, + .write_cr3 = native_write_cr3, + .flush_tlb_user = native_flush_tlb, .flush_tlb_kernel = native_flush_tlb_global, .flush_tlb_single = native_flush_tlb_single, @@ -381,12 +454,19 @@ struct paravirt_ops paravirt_ops = { .make_pte = native_make_pte, .make_pgd = native_make_pgd, - .irq_enable_sysexit = native_irq_enable_sysexit, - .iret = native_iret, - .dup_mmap = paravirt_nop, .exit_mmap = paravirt_nop, .activate_mm = paravirt_nop, + + .lazy_mode = { + .enter = paravirt_nop, + .leave = paravirt_nop, + }, }; -EXPORT_SYMBOL(paravirt_ops); +EXPORT_SYMBOL_GPL(pv_time_ops); +EXPORT_SYMBOL_GPL(pv_cpu_ops); +EXPORT_SYMBOL_GPL(pv_mmu_ops); +EXPORT_SYMBOL_GPL(pv_apic_ops); +EXPORT_SYMBOL_GPL(pv_info); +EXPORT_SYMBOL (pv_irq_ops); diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 71da01e73f03..6bf1f716909d 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -35,7 +35,8 @@ #include <linux/pci_ids.h> #include <linux/pci.h> #include <linux/delay.h> -#include <asm/iommu.h> +#include <linux/scatterlist.h> +#include <asm/gart.h> #include <asm/calgary.h> #include <asm/tce.h> #include <asm/pci-direct.h> @@ -221,10 +222,10 @@ static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen) return npages; } -static inline int translate_phb(struct pci_dev* dev) +static inline int translation_enabled(struct iommu_table *tbl) { - int disabled = bus_info[dev->bus->number].translation_disabled; - return !disabled; + /* only PHBs with translation enabled have an IOMMU table */ + return (tbl != NULL); } static void iommu_range_reserve(struct iommu_table *tbl, @@ -384,33 +385,36 @@ static void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, int direction) { struct iommu_table *tbl = find_iommu_table(dev); + struct scatterlist *s; + int i; - if (!translate_phb(to_pci_dev(dev))) + if (!translation_enabled(tbl)) return; - while (nelems--) { + for_each_sg(sglist, s, nelems, i) { unsigned int npages; - dma_addr_t dma = sglist->dma_address; - unsigned int dmalen = sglist->dma_length; + dma_addr_t dma = s->dma_address; + unsigned int dmalen = s->dma_length; if (dmalen == 0) break; npages = num_dma_pages(dma, dmalen); iommu_free(tbl, dma, npages); - sglist++; } } static int calgary_nontranslate_map_sg(struct device* dev, struct scatterlist *sg, int nelems, int direction) { + struct scatterlist *s; int i; - for (i = 0; i < nelems; i++ ) { - struct scatterlist *s = &sg[i]; - BUG_ON(!s->page); - s->dma_address = virt_to_bus(page_address(s->page) +s->offset); + for_each_sg(sg, s, nelems, i) { + struct page *p = sg_page(s); + + BUG_ON(!p); + s->dma_address = virt_to_bus(sg_virt(s)); s->dma_length = s->length; } return nelems; @@ -420,19 +424,19 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, int nelems, int direction) { struct iommu_table *tbl = find_iommu_table(dev); + struct scatterlist *s; unsigned long vaddr; unsigned int npages; unsigned long entry; int i; - if (!translate_phb(to_pci_dev(dev))) + if (!translation_enabled(tbl)) return calgary_nontranslate_map_sg(dev, sg, nelems, direction); - for (i = 0; i < nelems; i++ ) { - struct scatterlist *s = &sg[i]; - BUG_ON(!s->page); + for_each_sg(sg, s, nelems, i) { + BUG_ON(!sg_page(s)); - vaddr = (unsigned long)page_address(s->page) + s->offset; + vaddr = (unsigned long) sg_virt(s); npages = num_dma_pages(vaddr, s->length); entry = iommu_range_alloc(tbl, npages); @@ -454,9 +458,9 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, return nelems; error: calgary_unmap_sg(dev, sg, nelems, direction); - for (i = 0; i < nelems; i++) { - sg[i].dma_address = bad_dma_address; - sg[i].dma_length = 0; + for_each_sg(sg, s, nelems, i) { + sg->dma_address = bad_dma_address; + sg->dma_length = 0; } return 0; } @@ -472,7 +476,7 @@ static dma_addr_t calgary_map_single(struct device *dev, void *vaddr, uaddr = (unsigned long)vaddr; npages = num_dma_pages(uaddr, size); - if (translate_phb(to_pci_dev(dev))) + if (translation_enabled(tbl)) dma_handle = iommu_alloc(tbl, vaddr, npages, direction); else dma_handle = virt_to_bus(vaddr); @@ -486,7 +490,7 @@ static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, struct iommu_table *tbl = find_iommu_table(dev); unsigned int npages; - if (!translate_phb(to_pci_dev(dev))) + if (!translation_enabled(tbl)) return; npages = num_dma_pages(dma_handle, size); @@ -511,7 +515,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, goto error; memset(ret, 0, size); - if (translate_phb(to_pci_dev(dev))) { + if (translation_enabled(tbl)) { /* set up tces to cover the allocated range */ mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL); if (mapping == bad_dma_address) @@ -1192,7 +1196,7 @@ static int __init calgary_init(void) { int ret; struct pci_dev *dev = NULL; - void *tce_space; + struct calgary_bus_info *info; ret = calgary_locate_bbars(); if (ret) @@ -1204,12 +1208,14 @@ static int __init calgary_init(void) break; if (!is_cal_pci_dev(dev->device)) continue; - if (!translate_phb(dev)) { + + info = &bus_info[dev->bus->number]; + if (info->translation_disabled) { calgary_init_one_nontraslated(dev); continue; } - tce_space = bus_info[dev->bus->number].tce_space; - if (!tce_space && !translate_empty_slots) + + if (!info->tce_space && !translate_empty_slots) continue; ret = calgary_init_one(dev); @@ -1227,11 +1233,13 @@ error: break; if (!is_cal_pci_dev(dev->device)) continue; - if (!translate_phb(dev)) { + + info = &bus_info[dev->bus->number]; + if (info->translation_disabled) { pci_dev_put(dev); continue; } - if (!bus_info[dev->bus->number].tce_space && !translate_empty_slots) + if (!info->tce_space && !translate_empty_slots) continue; calgary_disable_translation(dev); @@ -1544,7 +1552,7 @@ static void __init calgary_fixup_one_tce_space(struct pci_dev *dev) static int __init calgary_fixup_tce_spaces(void) { struct pci_dev *dev = NULL; - void *tce_space; + struct calgary_bus_info *info; if (no_iommu || swiotlb || !calgary_detected) return -ENODEV; @@ -1557,11 +1565,12 @@ static int __init calgary_fixup_tce_spaces(void) break; if (!is_cal_pci_dev(dev->device)) continue; - if (!translate_phb(dev)) + + info = &bus_info[dev->bus->number]; + if (info->translation_disabled) continue; - tce_space = bus_info[dev->bus->number].tce_space; - if (!tce_space) + if (!info->tce_space) continue; calgary_fixup_one_tce_space(dev); diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c index 0aae2f3847a5..51330321a5d3 100644 --- a/arch/x86/kernel/pci-dma_32.c +++ b/arch/x86/kernel/pci-dma_32.c @@ -12,7 +12,6 @@ #include <linux/string.h> #include <linux/pci.h> #include <linux/module.h> -#include <linux/pci.h> #include <asm/io.h> struct dma_coherent_mem { diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index 9576a2eb375e..aa805b11b24f 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -7,11 +7,12 @@ #include <linux/string.h> #include <linux/pci.h> #include <linux/module.h> +#include <linux/dmar.h> #include <asm/io.h> -#include <asm/iommu.h> +#include <asm/gart.h> #include <asm/calgary.h> -int iommu_merge __read_mostly = 0; +int iommu_merge __read_mostly = 1; EXPORT_SYMBOL(iommu_merge); dma_addr_t bad_dma_address __read_mostly; @@ -51,11 +52,9 @@ dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) { struct page *page; int node; -#ifdef CONFIG_PCI - if (dev->bus == &pci_bus_type) - node = pcibus_to_node(to_pci_dev(dev)->bus); - else -#endif + + node = dev_to_node(dev); + if (node == -1) node = numa_node_id(); if (node < first_node(node_online_map)) @@ -276,7 +275,7 @@ __init int iommu_setup(char *p) swiotlb = 1; #endif -#ifdef CONFIG_IOMMU +#ifdef CONFIG_GART_IOMMU gart_parse_options(p); #endif @@ -299,14 +298,16 @@ void __init pci_iommu_alloc(void) * The order of these functions is important for * fall-back/fail-over reasons */ -#ifdef CONFIG_IOMMU - iommu_hole_init(); +#ifdef CONFIG_GART_IOMMU + gart_iommu_hole_init(); #endif #ifdef CONFIG_CALGARY_IOMMU detect_calgary(); #endif + detect_intel_iommu(); + #ifdef CONFIG_SWIOTLB pci_swiotlb_init(); #endif @@ -318,7 +319,9 @@ static int __init pci_iommu_init(void) calgary_iommu_init(); #endif -#ifdef CONFIG_IOMMU + intel_iommu_init(); + +#ifdef CONFIG_GART_IOMMU gart_iommu_init(); #endif diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 4918c575d582..06bcba536045 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -8,6 +8,7 @@ * See Documentation/DMA-mapping.txt for the interface specification. * * Copyright 2002 Andi Kleen, SuSE Labs. + * Subject to the GNU General Public License v2 only. */ #include <linux/types.h> @@ -23,22 +24,23 @@ #include <linux/interrupt.h> #include <linux/bitops.h> #include <linux/kdebug.h> +#include <linux/scatterlist.h> #include <asm/atomic.h> #include <asm/io.h> #include <asm/mtrr.h> #include <asm/pgtable.h> #include <asm/proto.h> -#include <asm/iommu.h> +#include <asm/gart.h> #include <asm/cacheflush.h> #include <asm/swiotlb.h> #include <asm/dma.h> #include <asm/k8.h> -unsigned long iommu_bus_base; /* GART remapping area (physical) */ +static unsigned long iommu_bus_base; /* GART remapping area (physical) */ static unsigned long iommu_size; /* size of remapping area bytes */ static unsigned long iommu_pages; /* .. and in pages */ -u32 *iommu_gatt_base; /* Remapping table */ +static u32 *iommu_gatt_base; /* Remapping table */ /* If this is disabled the IOMMU will use an optimized flushing strategy of only flushing when an mapping is reused. With it true the GART is flushed @@ -133,8 +135,8 @@ static void flush_gart(void) /* Debugging aid for drivers that don't free their IOMMU tables */ static void **iommu_leak_tab; static int leak_trace; -int iommu_leak_pages = 20; -void dump_leak(void) +static int iommu_leak_pages = 20; +static void dump_leak(void) { int i; static int dump; @@ -278,10 +280,10 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, */ static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) { + struct scatterlist *s; int i; - for (i = 0; i < nents; i++) { - struct scatterlist *s = &sg[i]; + for_each_sg(sg, s, nents, i) { if (!s->dma_length || !s->length) break; gart_unmap_single(dev, s->dma_address, s->dma_length, dir); @@ -292,15 +294,15 @@ static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, int nents, int dir) { + struct scatterlist *s; int i; #ifdef CONFIG_IOMMU_DEBUG printk(KERN_DEBUG "dma_map_sg overflow\n"); #endif - for (i = 0; i < nents; i++ ) { - struct scatterlist *s = &sg[i]; - unsigned long addr = page_to_phys(s->page) + s->offset; + for_each_sg(sg, s, nents, i) { + unsigned long addr = sg_phys(s); if (nonforced_iommu(dev, addr, s->length)) { addr = dma_map_area(dev, addr, s->length, dir); if (addr == bad_dma_address) { @@ -319,24 +321,23 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, } /* Map multiple scatterlist entries continuous into the first. */ -static int __dma_map_cont(struct scatterlist *sg, int start, int stopat, +static int __dma_map_cont(struct scatterlist *start, int nelems, struct scatterlist *sout, unsigned long pages) { unsigned long iommu_start = alloc_iommu(pages); unsigned long iommu_page = iommu_start; + struct scatterlist *s; int i; if (iommu_start == -1) return -1; - - for (i = start; i < stopat; i++) { - struct scatterlist *s = &sg[i]; + + for_each_sg(start, s, nelems, i) { unsigned long pages, addr; unsigned long phys_addr = s->dma_address; - BUG_ON(i > start && s->offset); - if (i == start) { - *sout = *s; + BUG_ON(s != start && s->offset); + if (s == start) { sout->dma_address = iommu_bus_base; sout->dma_address += iommu_page*PAGE_SIZE + s->offset; sout->dma_length = s->length; @@ -357,30 +358,32 @@ static int __dma_map_cont(struct scatterlist *sg, int start, int stopat, return 0; } -static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat, +static inline int dma_map_cont(struct scatterlist *start, int nelems, struct scatterlist *sout, unsigned long pages, int need) { - if (!need) { - BUG_ON(stopat - start != 1); - *sout = sg[start]; - sout->dma_length = sg[start].length; + if (!need) { + BUG_ON(nelems != 1); + sout->dma_address = start->dma_address; + sout->dma_length = start->length; return 0; - } - return __dma_map_cont(sg, start, stopat, sout, pages); + } + return __dma_map_cont(start, nelems, sout, pages); } /* * DMA map all entries in a scatterlist. * Merge chunks that have page aligned sizes into a continuous mapping. */ -int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) +static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, + int dir) { int i; int out; int start; unsigned long pages = 0; int need = 0, nextneed; + struct scatterlist *s, *ps, *start_sg, *sgmap; if (nents == 0) return 0; @@ -390,9 +393,10 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) out = 0; start = 0; - for (i = 0; i < nents; i++) { - struct scatterlist *s = &sg[i]; - dma_addr_t addr = page_to_phys(s->page) + s->offset; + start_sg = sgmap = sg; + ps = NULL; /* shut up gcc */ + for_each_sg(sg, s, nents, i) { + dma_addr_t addr = sg_phys(s); s->dma_address = addr; BUG_ON(s->length == 0); @@ -400,34 +404,38 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) /* Handle the previous not yet processed entries */ if (i > start) { - struct scatterlist *ps = &sg[i-1]; /* Can only merge when the last chunk ends on a page boundary and the new one doesn't have an offset. */ if (!iommu_merge || !nextneed || !need || s->offset || - (ps->offset + ps->length) % PAGE_SIZE) { - if (dma_map_cont(sg, start, i, sg+out, pages, - need) < 0) + (ps->offset + ps->length) % PAGE_SIZE) { + if (dma_map_cont(start_sg, i - start, sgmap, + pages, need) < 0) goto error; out++; + sgmap = sg_next(sgmap); pages = 0; - start = i; + start = i; + start_sg = s; } } need = nextneed; pages += to_pages(s->offset, s->length); + ps = s; } - if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0) + if (dma_map_cont(start_sg, i - start, sgmap, pages, need) < 0) goto error; out++; flush_gart(); - if (out < nents) - sg[out].dma_length = 0; + if (out < nents) { + sgmap = sg_next(sgmap); + sgmap->dma_length = 0; + } return out; error: flush_gart(); - gart_unmap_sg(dev, sg, nents, dir); + gart_unmap_sg(dev, sg, out, dir); /* When it was forced or merged try again in a dumb way */ if (force_iommu || iommu_merge) { out = dma_map_sg_nonforce(dev, sg, nents, dir); @@ -437,8 +445,8 @@ error: if (panic_on_overflow) panic("dma_map_sg: overflow on %lu pages\n", pages); iommu_full(dev, pages << PAGE_SHIFT, dir); - for (i = 0; i < nents; i++) - sg[i].dma_address = bad_dma_address; + for_each_sg(sg, s, nents, i) + s->dma_address = bad_dma_address; return 0; } @@ -619,12 +627,12 @@ void __init gart_iommu_init(void) return; /* Did we detect a different HW IOMMU? */ - if (iommu_detected && !iommu_aperture) + if (iommu_detected && !gart_iommu_aperture) return; if (no_iommu || (!force_iommu && end_pfn <= MAX_DMA32_PFN) || - !iommu_aperture || + !gart_iommu_aperture || (no_agp && init_k8_gatt(&info) < 0)) { if (end_pfn > MAX_DMA32_PFN) { printk(KERN_ERR "WARNING more than 4GB of memory " @@ -725,9 +733,9 @@ void __init gart_parse_options(char *p) fix_aperture = 0; /* duplicated from pci-dma.c */ if (!strncmp(p,"force",5)) - iommu_aperture_allowed = 1; + gart_iommu_aperture_allowed = 1; if (!strncmp(p,"allowed",7)) - iommu_aperture_allowed = 1; + gart_iommu_aperture_allowed = 1; if (!strncmp(p, "memaper", 7)) { fallback_aper_force = 1; p += 7; diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c index 2a34c6c025a9..ab08e1832228 100644 --- a/arch/x86/kernel/pci-nommu_64.c +++ b/arch/x86/kernel/pci-nommu_64.c @@ -5,8 +5,9 @@ #include <linux/pci.h> #include <linux/string.h> #include <linux/dma-mapping.h> +#include <linux/scatterlist.h> -#include <asm/iommu.h> +#include <asm/gart.h> #include <asm/processor.h> #include <asm/dma.h> @@ -57,12 +58,12 @@ static void nommu_unmap_single(struct device *dev, dma_addr_t addr,size_t size, static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction) { + struct scatterlist *s; int i; - for (i = 0; i < nents; i++ ) { - struct scatterlist *s = &sg[i]; - BUG_ON(!s->page); - s->dma_address = virt_to_bus(page_address(s->page) +s->offset); + for_each_sg(sg, s, nents, i) { + BUG_ON(!sg_page(s)); + s->dma_address = virt_to_bus(sg_virt(s)); if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) return 0; s->dma_length = s->length; diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index b2f405ea7c85..102866d729a5 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -5,7 +5,7 @@ #include <linux/module.h> #include <linux/dma-mapping.h> -#include <asm/iommu.h> +#include <asm/gart.h> #include <asm/swiotlb.h> #include <asm/dma.h> diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 097aeafce5ff..7b899584d290 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -295,34 +295,52 @@ static int __init idle_setup(char *str) } early_param("idle", idle_setup); -void show_regs(struct pt_regs * regs) +void __show_registers(struct pt_regs *regs, int all) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; unsigned long d0, d1, d2, d3, d6, d7; + unsigned long esp; + unsigned short ss, gs; + + if (user_mode_vm(regs)) { + esp = regs->esp; + ss = regs->xss & 0xffff; + savesegment(gs, gs); + } else { + esp = (unsigned long) (®s->esp); + savesegment(ss, ss); + savesegment(gs, gs); + } printk("\n"); - printk("Pid: %d, comm: %20s\n", current->pid, current->comm); - printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); + printk("Pid: %d, comm: %s %s (%s %.*s)\n", + task_pid_nr(current), current->comm, + print_tainted(), init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + + printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", + 0xffff & regs->xcs, regs->eip, regs->eflags, + smp_processor_id()); print_symbol("EIP is at %s\n", regs->eip); - if (user_mode_vm(regs)) - printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); - printk(" EFLAGS: %08lx %s (%s %.*s)\n", - regs->eflags, print_tainted(), init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", - regs->eax,regs->ebx,regs->ecx,regs->edx); - printk("ESI: %08lx EDI: %08lx EBP: %08lx", - regs->esi, regs->edi, regs->ebp); - printk(" DS: %04x ES: %04x FS: %04x\n", - 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs); + regs->eax, regs->ebx, regs->ecx, regs->edx); + printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", + regs->esi, regs->edi, regs->ebp, esp); + printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", + regs->xds & 0xffff, regs->xes & 0xffff, + regs->xfs & 0xffff, gs, ss); + + if (!all) + return; cr0 = read_cr0(); cr2 = read_cr2(); cr3 = read_cr3(); cr4 = read_cr4_safe(); - printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); + printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", + cr0, cr2, cr3, cr4); get_debugreg(d0, 0); get_debugreg(d1, 1); @@ -330,10 +348,16 @@ void show_regs(struct pt_regs * regs) get_debugreg(d3, 3); printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", d0, d1, d2, d3); + get_debugreg(d6, 6); get_debugreg(d7, 7); - printk("DR6: %08lx DR7: %08lx\n", d6, d7); + printk("DR6: %08lx DR7: %08lx\n", + d6, d7); +} +void show_regs(struct pt_regs *regs) +{ + __show_registers(regs, 1); show_trace(NULL, regs, ®s->esp); } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 7352d4b377e6..6309b275cb9c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -581,7 +581,7 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, * * Kprobes not supported here. Set the probe on schedule instead. */ -__kprobes struct task_struct * +struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace_32.c index 0cecd7513c97..ff5431cc03ee 100644 --- a/arch/x86/kernel/ptrace_32.c +++ b/arch/x86/kernel/ptrace_32.c @@ -165,7 +165,7 @@ static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_ seg &= ~7UL; - down(&child->mm->context.sem); + mutex_lock(&child->mm->context.lock); if (unlikely((seg >> 3) >= child->mm->context.size)) addr = -1L; /* bogus selector, access would fault */ else { @@ -179,7 +179,7 @@ static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_ addr &= 0xffff; addr += base; } - up(&child->mm->context.sem); + mutex_unlock(&child->mm->context.lock); } return addr; } @@ -524,11 +524,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = 0; break; - case PTRACE_DETACH: - /* detach a process that was attached. */ - ret = ptrace_detach(child, data); - break; - case PTRACE_GETREGS: { /* Get all gp regs from the child. */ if (!access_ok(VERIFY_WRITE, datap, FRAME_SIZE*sizeof(long))) { ret = -EIO; @@ -637,7 +632,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) /* User-mode eip? */ info.si_addr = user_mode_vm(regs) ? (void __user *) regs->eip : NULL; - /* Send us the fakey SIGTRAP */ + /* Send us the fake SIGTRAP */ force_sig_info(SIGTRAP, &info, tsk); } diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c index c0cac42df3b6..607085f3f08a 100644 --- a/arch/x86/kernel/ptrace_64.c +++ b/arch/x86/kernel/ptrace_64.c @@ -103,7 +103,7 @@ unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *r seg &= ~7UL; - down(&child->mm->context.sem); + mutex_lock(&child->mm->context.lock); if (unlikely((seg >> 3) >= child->mm->context.size)) addr = -1L; /* bogus selector, access would fault */ else { @@ -117,7 +117,7 @@ unsigned long convert_rip_to_linear(struct task_struct *child, struct pt_regs *r addr &= 0xffff; addr += base; } - up(&child->mm->context.sem); + mutex_unlock(&child->mm->context.lock); } return addr; @@ -500,11 +500,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = 0; break; - case PTRACE_DETACH: - /* detach a process that was attached. */ - ret = ptrace_detach(child, data); - break; - case PTRACE_GETREGS: { /* Get all gp regs from the child. */ if (!access_ok(VERIFY_WRITE, (unsigned __user *)data, sizeof(struct user_regs_struct))) { diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index d769e204f942..fab30e134836 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -45,9 +45,12 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) if (!(config & 0x2)) pci_write_config_byte(dev, 0xf4, config); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, + quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, + quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, + quirk_intel_irqbalance); #endif #if defined(CONFIG_HPET_TIMER) @@ -56,7 +59,9 @@ unsigned long force_hpet_address; static enum { NONE_FORCE_HPET_RESUME, OLD_ICH_FORCE_HPET_RESUME, - ICH_FORCE_HPET_RESUME + ICH_FORCE_HPET_RESUME, + VT8237_FORCE_HPET_RESUME, + NVIDIA_FORCE_HPET_RESUME, } force_hpet_resume_type; static void __iomem *rcba_base; @@ -146,17 +151,17 @@ static void ich_force_enable_hpet(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, - ich_force_enable_hpet); + ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, - ich_force_enable_hpet); + ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0, - ich_force_enable_hpet); + ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_1, - ich_force_enable_hpet); + ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31, - ich_force_enable_hpet); + ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, - ich_force_enable_hpet); + ich_force_enable_hpet); static struct pci_dev *cached_dev; @@ -232,10 +237,140 @@ static void old_ich_force_enable_hpet(struct pci_dev *dev) printk(KERN_DEBUG "Failed to force enable HPET\n"); } +/* + * Undocumented chipset features. Make sure that the user enforced + * this. + */ +static void old_ich_force_enable_hpet_user(struct pci_dev *dev) +{ + if (hpet_force_user) + old_ich_force_enable_hpet(dev); +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, + old_ich_force_enable_hpet_user); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, + old_ich_force_enable_hpet_user); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0, + old_ich_force_enable_hpet_user); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12, + old_ich_force_enable_hpet_user); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0, - old_ich_force_enable_hpet); + old_ich_force_enable_hpet); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_12, - old_ich_force_enable_hpet); + old_ich_force_enable_hpet); + + +static void vt8237_force_hpet_resume(void) +{ + u32 val; + + if (!force_hpet_address || !cached_dev) + return; + + val = 0xfed00000 | 0x80; + pci_write_config_dword(cached_dev, 0x68, val); + + pci_read_config_dword(cached_dev, 0x68, &val); + if (val & 0x80) + printk(KERN_DEBUG "Force enabled HPET at resume\n"); + else + BUG(); +} + +static void vt8237_force_enable_hpet(struct pci_dev *dev) +{ + u32 uninitialized_var(val); + + if (!hpet_force_user || hpet_address || force_hpet_address) + return; + + pci_read_config_dword(dev, 0x68, &val); + /* + * Bit 7 is HPET enable bit. + * Bit 31:10 is HPET base address (contrary to what datasheet claims) + */ + if (val & 0x80) { + force_hpet_address = (val & ~0x3ff); + printk(KERN_DEBUG "HPET at base address 0x%lx\n", + force_hpet_address); + return; + } + + /* + * HPET is disabled. Trying enabling at FED00000 and check + * whether it sticks + */ + val = 0xfed00000 | 0x80; + pci_write_config_dword(dev, 0x68, val); + + pci_read_config_dword(dev, 0x68, &val); + if (val & 0x80) { + force_hpet_address = (val & ~0x3ff); + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", + force_hpet_address); + cached_dev = dev; + force_hpet_resume_type = VT8237_FORCE_HPET_RESUME; + return; + } + + printk(KERN_DEBUG "Failed to force enable HPET\n"); +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, + vt8237_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, + vt8237_force_enable_hpet); + +/* + * Undocumented chipset feature taken from LinuxBIOS. + */ +static void nvidia_force_hpet_resume(void) +{ + pci_write_config_dword(cached_dev, 0x44, 0xfed00001); + printk(KERN_DEBUG "Force enabled HPET at resume\n"); +} + +static void nvidia_force_enable_hpet(struct pci_dev *dev) +{ + u32 uninitialized_var(val); + + if (!hpet_force_user || hpet_address || force_hpet_address) + return; + + pci_write_config_dword(dev, 0x44, 0xfed00001); + pci_read_config_dword(dev, 0x44, &val); + force_hpet_address = val & 0xfffffffe; + force_hpet_resume_type = NVIDIA_FORCE_HPET_RESUME; + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", + force_hpet_address); + cached_dev = dev; + return; +} + +/* ISA Bridges */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0050, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0051, + nvidia_force_enable_hpet); + +/* LPC bridges */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0360, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0361, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0362, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0363, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0364, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0365, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0366, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0367, + nvidia_force_enable_hpet); void force_hpet_resume(void) { @@ -246,6 +381,12 @@ void force_hpet_resume(void) case OLD_ICH_FORCE_HPET_RESUME: return old_ich_force_hpet_resume(); + case VT8237_FORCE_HPET_RESUME: + return vt8237_force_hpet_resume(); + + case NVIDIA_FORCE_HPET_RESUME: + return nvidia_force_hpet_resume(); + default: break; } diff --git a/arch/x86/kernel/reboot_64.c b/arch/x86/kernel/reboot_64.c index 368db2b9c5ac..71b13c5f5817 100644 --- a/arch/x86/kernel/reboot_64.c +++ b/arch/x86/kernel/reboot_64.c @@ -11,12 +11,13 @@ #include <linux/sched.h> #include <asm/io.h> #include <asm/delay.h> +#include <asm/desc.h> #include <asm/hw_irq.h> #include <asm/system.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/apic.h> -#include <asm/iommu.h> +#include <asm/gart.h> /* * Power off function, if any @@ -136,7 +137,7 @@ void machine_emergency_restart(void) } case BOOT_TRIPLE: - __asm__ __volatile__("lidt (%0)": :"r" (&no_idt)); + load_idt((const struct desc_ptr *)&no_idt); __asm__ __volatile__("int3"); reboot_type = BOOT_KBD; diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c index 8b30b26ad069..1a07bbea7be3 100644 --- a/arch/x86/kernel/reboot_fixups_32.c +++ b/arch/x86/kernel/reboot_fixups_32.c @@ -12,6 +12,7 @@ #include <linux/interrupt.h> #include <asm/reboot_fixups.h> #include <asm/msr.h> +#include <asm/geode.h> static void cs5530a_warm_reset(struct pci_dev *dev) { @@ -24,11 +25,8 @@ static void cs5530a_warm_reset(struct pci_dev *dev) static void cs5536_warm_reset(struct pci_dev *dev) { - /* - * 6.6.2.12 Soft Reset (DIVIL_SOFT_RESET) - * writing 1 to the LSB of this MSR causes a hard reset. - */ - wrmsrl(0x51400017, 1ULL); + /* writing 1 to the LSB of this MSR causes a hard reset */ + wrmsrl(MSR_DIVIL_SOFT_RESET, 1ULL); udelay(50); /* shouldn't get here but be safe and spin a while */ } diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index 1200aaac403e..3558ac78c926 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c @@ -11,7 +11,6 @@ #include <linux/bootmem.h> #include <linux/bitops.h> #include <linux/module.h> -#include <asm/bootsetup.h> #include <asm/pda.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -23,8 +22,9 @@ #include <asm/percpu.h> #include <asm/proto.h> #include <asm/sections.h> +#include <asm/setup.h> -char x86_boot_params[BOOT_PARAM_SIZE] __initdata; +struct boot_params __initdata boot_params; cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; @@ -185,6 +185,12 @@ void __cpuinit check_efer(void) unsigned long kernel_eflags; /* + * Copies of the original ist values from the tss are only accessed during + * debugging, no special alignment required. + */ +DEFINE_PER_CPU(struct orig_ist, orig_ist); + +/* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT * and IDT. We reload them nevertheless, this function acts as a @@ -224,8 +230,8 @@ void __cpuinit cpu_init (void) memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE); cpu_gdt_descr[cpu].size = GDT_SIZE; - asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu])); - asm volatile("lidt %0" :: "m" (idt_descr)); + load_gdt((const struct desc_ptr *)&cpu_gdt_descr[cpu]); + load_idt((const struct desc_ptr *)&idt_descr); memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); syscall_init(); diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index c8e1bc38d421..e1e18c34c821 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -60,6 +60,7 @@ #include <asm/vmi.h> #include <setup_arch.h> #include <bios_ebda.h> +#include <asm/cacheflush.h> /* This value is set up by the early boot code to point to the value immediately after the boot time page tables. It contains a *physical* @@ -73,6 +74,7 @@ int disable_pse __devinitdata = 0; */ extern struct resource code_resource; extern struct resource data_resource; +extern struct resource bss_resource; /* cpu data as detected by the assembly code in head.S */ struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; @@ -84,9 +86,6 @@ unsigned long mmu_cr4_features; /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; -#ifdef CONFIG_MCA -EXPORT_SYMBOL(machine_id); -#endif unsigned int machine_submodel_id; unsigned int BIOS_revision; unsigned int mca_pentium_flag; @@ -137,10 +136,11 @@ EXPORT_SYMBOL(edd); */ static inline void copy_edd(void) { - memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature)); - memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info)); - edd.mbr_signature_nr = EDD_MBR_SIG_NR; - edd.edd_info_nr = EDD_NR; + memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, + sizeof(edd.mbr_signature)); + memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info)); + edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries; + edd.edd_info_nr = boot_params.eddbuf_entries; } #else static inline void copy_edd(void) @@ -377,6 +377,49 @@ extern unsigned long __init setup_memory(void); extern void zone_sizes_init(void); #endif /* !CONFIG_NEED_MULTIPLE_NODES */ +static inline unsigned long long get_total_mem(void) +{ + unsigned long long total; + + total = max_low_pfn - min_low_pfn; +#ifdef CONFIG_HIGHMEM + total += highend_pfn - highstart_pfn; +#endif + + return total << PAGE_SHIFT; +} + +#ifdef CONFIG_KEXEC +static void __init reserve_crashkernel(void) +{ + unsigned long long total_mem; + unsigned long long crash_size, crash_base; + int ret; + + total_mem = get_total_mem(); + + ret = parse_crashkernel(boot_command_line, total_mem, + &crash_size, &crash_base); + if (ret == 0 && crash_size > 0) { + if (crash_base > 0) { + printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " + "for crashkernel (System RAM: %ldMB)\n", + (unsigned long)(crash_size >> 20), + (unsigned long)(crash_base >> 20), + (unsigned long)(total_mem >> 20)); + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + reserve_bootmem(crash_base, crash_size); + } else + printk(KERN_INFO "crashkernel reservation failed - " + "you have to specify a base address\n"); + } +} +#else +static inline void __init reserve_crashkernel(void) +{} +#endif + void __init setup_bootmem_allocator(void) { unsigned long bootmap_size; @@ -434,26 +477,25 @@ void __init setup_bootmem_allocator(void) #endif numa_kva_reserve(); #ifdef CONFIG_BLK_DEV_INITRD - if (LOADER_TYPE && INITRD_START) { - if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { - reserve_bootmem(INITRD_START, INITRD_SIZE); - initrd_start = INITRD_START + PAGE_OFFSET; - initrd_end = initrd_start+INITRD_SIZE; - } - else { + if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; + unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; + unsigned long ramdisk_end = ramdisk_image + ramdisk_size; + unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; + + if (ramdisk_end <= end_of_lowmem) { + reserve_bootmem(ramdisk_image, ramdisk_size); + initrd_start = ramdisk_image + PAGE_OFFSET; + initrd_end = initrd_start+ramdisk_size; + } else { printk(KERN_ERR "initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - INITRD_START + INITRD_SIZE, - max_low_pfn << PAGE_SHIFT); + "(0x%08lx > 0x%08lx)\ndisabling initrd\n", + ramdisk_end, end_of_lowmem); initrd_start = 0; } } #endif -#ifdef CONFIG_KEXEC - if (crashk_res.start != crashk_res.end) - reserve_bootmem(crashk_res.start, - crashk_res.end - crashk_res.start + 1); -#endif + reserve_crashkernel(); } /* @@ -512,28 +554,29 @@ void __init setup_arch(char **cmdline_p) * the system table is valid. If not, then initialize normally. */ #ifdef CONFIG_EFI - if ((LOADER_TYPE == 0x50) && EFI_SYSTAB) + if ((boot_params.hdr.type_of_loader == 0x50) && + boot_params.efi_info.efi_systab) efi_enabled = 1; #endif - ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); - screen_info = SCREEN_INFO; - edid_info = EDID_INFO; - apm_info.bios = APM_BIOS_INFO; - ist_info = IST_INFO; - saved_videomode = VIDEO_MODE; - if( SYS_DESC_TABLE.length != 0 ) { - set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2); - machine_id = SYS_DESC_TABLE.table[0]; - machine_submodel_id = SYS_DESC_TABLE.table[1]; - BIOS_revision = SYS_DESC_TABLE.table[2]; + ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); + screen_info = boot_params.screen_info; + edid_info = boot_params.edid_info; + apm_info.bios = boot_params.apm_bios_info; + ist_info = boot_params.ist_info; + saved_videomode = boot_params.hdr.vid_mode; + if( boot_params.sys_desc_table.length != 0 ) { + set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2); + machine_id = boot_params.sys_desc_table.table[0]; + machine_submodel_id = boot_params.sys_desc_table.table[1]; + BIOS_revision = boot_params.sys_desc_table.table[2]; } - bootloader_type = LOADER_TYPE; + bootloader_type = boot_params.hdr.type_of_loader; #ifdef CONFIG_BLK_DEV_RAM - rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0); + rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; + rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); + rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); #endif ARCH_SETUP if (efi_enabled) @@ -545,7 +588,7 @@ void __init setup_arch(char **cmdline_p) copy_edd(); - if (!MOUNT_ROOT_RDONLY) + if (!boot_params.hdr.root_flags) root_mountflags &= ~MS_RDONLY; init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; @@ -556,6 +599,8 @@ void __init setup_arch(char **cmdline_p) code_resource.end = virt_to_phys(_etext)-1; data_resource.start = virt_to_phys(_etext); data_resource.end = virt_to_phys(_edata)-1; + bss_resource.start = virt_to_phys(&__bss_start); + bss_resource.end = virt_to_phys(&__bss_stop)-1; parse_early_param(); @@ -580,7 +625,7 @@ void __init setup_arch(char **cmdline_p) /* * NOTE: before this point _nobody_ is allowed to allocate * any memory using the bootmem allocator. Although the - * alloctor is now initialised only the first 8Mb of the kernel + * allocator is now initialised only the first 8Mb of the kernel * virtual address space has been mapped. All allocations before * paging_init() has completed must use the alloc_bootmem_low_pages() * variant (which allocates DMA'able memory) and care must be taken @@ -617,9 +662,7 @@ void __init setup_arch(char **cmdline_p) #endif #ifdef CONFIG_PCI -#ifdef CONFIG_X86_IO_APIC - check_acpi_pci(); /* Checks more than just ACPI actually */ -#endif + early_quirks(); #endif #ifdef CONFIG_ACPI diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index b7da90e79c78..238633d3d09a 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -52,13 +52,13 @@ #include <asm/dma.h> #include <asm/mpspec.h> #include <asm/mmu_context.h> -#include <asm/bootsetup.h> #include <asm/proto.h> #include <asm/setup.h> #include <asm/mach_apic.h> #include <asm/numa.h> #include <asm/sections.h> #include <asm/dmi.h> +#include <asm/cacheflush.h> /* * Machine setup.. @@ -134,6 +134,12 @@ struct resource code_resource = { .end = 0, .flags = IORESOURCE_RAM, }; +struct resource bss_resource = { + .name = "Kernel bss", + .start = 0, + .end = 0, + .flags = IORESOURCE_RAM, +}; #ifdef CONFIG_PROC_VMCORE /* elfcorehdr= specifies the location of elf core header @@ -180,10 +186,11 @@ EXPORT_SYMBOL(edd); */ static inline void copy_edd(void) { - memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature)); - memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info)); - edd.mbr_signature_nr = EDD_MBR_SIG_NR; - edd.edd_info_nr = EDD_NR; + memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, + sizeof(edd.mbr_signature)); + memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info)); + edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries; + edd.edd_info_nr = boot_params.eddbuf_entries; } #else static inline void copy_edd(void) @@ -191,6 +198,37 @@ static inline void copy_edd(void) } #endif +#ifdef CONFIG_KEXEC +static void __init reserve_crashkernel(void) +{ + unsigned long long free_mem; + unsigned long long crash_size, crash_base; + int ret; + + free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT; + + ret = parse_crashkernel(boot_command_line, free_mem, + &crash_size, &crash_base); + if (ret == 0 && crash_size) { + if (crash_base > 0) { + printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " + "for crashkernel (System RAM: %ldMB)\n", + (unsigned long)(crash_size >> 20), + (unsigned long)(crash_base >> 20), + (unsigned long)(free_mem >> 20)); + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + reserve_bootmem(crash_base, crash_size); + } else + printk(KERN_INFO "crashkernel reservation failed - " + "you have to specify a base address\n"); + } +} +#else +static inline void __init reserve_crashkernel(void) +{} +#endif + #define EBDA_ADDR_POINTER 0x40E unsigned __initdata ebda_addr; @@ -220,21 +258,21 @@ void __init setup_arch(char **cmdline_p) { printk(KERN_INFO "Command line: %s\n", boot_command_line); - ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); - screen_info = SCREEN_INFO; - edid_info = EDID_INFO; - saved_video_mode = SAVED_VIDEO_MODE; - bootloader_type = LOADER_TYPE; + ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); + screen_info = boot_params.screen_info; + edid_info = boot_params.edid_info; + saved_video_mode = boot_params.hdr.vid_mode; + bootloader_type = boot_params.hdr.type_of_loader; #ifdef CONFIG_BLK_DEV_RAM - rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0); + rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; + rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); + rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); #endif setup_memory_region(); copy_edd(); - if (!MOUNT_ROOT_RDONLY) + if (!boot_params.hdr.root_flags) root_mountflags &= ~MS_RDONLY; init_mm.start_code = (unsigned long) &_text; init_mm.end_code = (unsigned long) &_etext; @@ -245,6 +283,8 @@ void __init setup_arch(char **cmdline_p) code_resource.end = virt_to_phys(&_etext)-1; data_resource.start = virt_to_phys(&_etext); data_resource.end = virt_to_phys(&_edata)-1; + bss_resource.start = virt_to_phys(&__bss_start); + bss_resource.end = virt_to_phys(&__bss_stop)-1; early_identify_cpu(&boot_cpu_data); @@ -271,6 +311,11 @@ void __init setup_arch(char **cmdline_p) dmi_scan_machine(); +#ifdef CONFIG_SMP + /* setup to use the static apicid table during kernel startup */ + x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init; +#endif + #ifdef CONFIG_ACPI /* * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). @@ -339,28 +384,25 @@ void __init setup_arch(char **cmdline_p) */ find_smp_config(); #ifdef CONFIG_BLK_DEV_INITRD - if (LOADER_TYPE && INITRD_START) { - if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { - reserve_bootmem_generic(INITRD_START, INITRD_SIZE); - initrd_start = INITRD_START + PAGE_OFFSET; - initrd_end = initrd_start+INITRD_SIZE; - } - else { + if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; + unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; + unsigned long ramdisk_end = ramdisk_image + ramdisk_size; + unsigned long end_of_mem = end_pfn << PAGE_SHIFT; + + if (ramdisk_end <= end_of_mem) { + reserve_bootmem_generic(ramdisk_image, ramdisk_size); + initrd_start = ramdisk_image + PAGE_OFFSET; + initrd_end = initrd_start+ramdisk_size; + } else { printk(KERN_ERR "initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - (unsigned long)(INITRD_START + INITRD_SIZE), - (unsigned long)(end_pfn << PAGE_SHIFT)); + "(0x%08lx > 0x%08lx)\ndisabling initrd\n", + ramdisk_end, end_of_mem); initrd_start = 0; } } #endif -#ifdef CONFIG_KEXEC - if (crashk_res.start != crashk_res.end) { - reserve_bootmem_generic(crashk_res.start, - crashk_res.end - crashk_res.start + 1); - } -#endif - + reserve_crashkernel(); paging_init(); #ifdef CONFIG_PCI @@ -526,7 +568,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) but in the same order as the HT nodeids. If that doesn't result in a usable node fall back to the path for the previous case. */ - int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits); + int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits); if (ht_nodeid >= 0 && apicid_to_node[ht_nodeid] != NUMA_NO_NODE) node = apicid_to_node[ht_nodeid]; @@ -601,7 +643,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) level = cpuid_eax(1); if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)) set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); - if (c->x86 == 0x10) + if (c->x86 == 0x10 || c->x86 == 0x11) set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); /* Enable workaround for FXSAVE leak */ @@ -850,6 +892,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_SMP c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; + c->cpu_index = 0; #endif } @@ -956,6 +999,7 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) static int show_cpuinfo(struct seq_file *m, void *v) { struct cpuinfo_x86 *c = v; + int cpu = 0; /* * These flag bits must match the definitions in <asm/cpufeature.h>. @@ -965,7 +1009,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) * applications want to get the raw CPUID data, they should access * /dev/cpu/<cpu_nr>/cpuid instead. */ - static char *x86_cap_flags[] = { + static const char *const x86_cap_flags[] = { /* Intel-defined */ "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", @@ -996,7 +1040,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) /* Intel-defined (#2) */ "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt", + NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* VIA/Cyrix/Centaur-defined */ @@ -1006,10 +1050,10 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* AMD-defined (#2) */ - "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy", - "altmovcr8", "abm", "sse4a", - "misalignsse", "3dnowprefetch", - "osvw", "ibs", NULL, NULL, NULL, NULL, + "lahf_lm", "cmp_legacy", "svm", "extapic", + "cr8_legacy", "abm", "sse4a", "misalignsse", + "3dnowprefetch", "osvw", "ibs", "sse5", + "skinit", "wdt", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -1019,7 +1063,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; - static char *x86_power_flags[] = { + static const char *const x86_power_flags[] = { "ts", /* temperature sensor */ "fid", /* frequency id control */ "vid", /* voltage id control */ @@ -1034,8 +1078,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) #ifdef CONFIG_SMP - if (!cpu_online(c-cpu_data)) + if (!cpu_online(c->cpu_index)) return 0; + cpu = c->cpu_index; #endif seq_printf(m,"processor\t: %u\n" @@ -1043,7 +1088,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) "cpu family\t: %d\n" "model\t\t: %d\n" "model name\t: %s\n", - (unsigned)(c-cpu_data), + (unsigned)cpu, c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", c->x86, (int)c->x86_model, @@ -1055,7 +1100,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "stepping\t: unknown\n"); if (cpu_has(c,X86_FEATURE_TSC)) { - unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data)); + unsigned int freq = cpufreq_quick_get((unsigned)cpu); if (!freq) freq = cpu_khz; seq_printf(m, "cpu MHz\t\t: %u.%03u\n", @@ -1068,9 +1113,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) #ifdef CONFIG_SMP if (smp_num_siblings * c->x86_max_cores > 1) { - int cpu = c - cpu_data; seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); - seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu])); + seq_printf(m, "siblings\t: %d\n", + cpus_weight(per_cpu(cpu_core_map, cpu))); seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); } @@ -1125,12 +1170,16 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < NR_CPUS ? cpu_data + *pos : NULL; + if (*pos == 0) /* just in case, cpu 0 is not the first */ + *pos = first_cpu(cpu_possible_map); + if ((*pos) < NR_CPUS && cpu_possible(*pos)) + return &cpu_data(*pos); + return NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) { - ++*pos; + *pos = next_cpu(*pos, cpu_possible_map); return c_start(m, pos); } diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index d01d51fcce2a..9bdd83022f5f 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -200,8 +200,8 @@ badframe: if (show_unhandled_signals && printk_ratelimit()) printk("%s%s[%d] bad frame in sigreturn frame:%p eip:%lx" " esp:%lx oeax:%lx\n", - current->pid > 1 ? KERN_INFO : KERN_EMERG, - current->comm, current->pid, frame, regs->eip, + task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, + current->comm, task_pid_nr(current), frame, regs->eip, regs->esp, regs->orig_eax); force_sig(SIGSEGV, current); @@ -385,7 +385,6 @@ static int setup_frame(int sig, struct k_sigaction *ka, regs->edx = (unsigned long) 0; regs->ecx = (unsigned long) 0; - set_fs(USER_DS); regs->xds = __USER_DS; regs->xes = __USER_DS; regs->xss = __USER_DS; @@ -479,7 +478,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->edx = (unsigned long) &frame->info; regs->ecx = (unsigned long) &frame->uc; - set_fs(USER_DS); regs->xds = __USER_DS; regs->xes = __USER_DS; regs->xss = __USER_DS; @@ -596,7 +594,7 @@ static void fastcall do_signal(struct pt_regs *regs) signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { - /* Reenable any watchpoints before delivering the + /* Re-enable any watchpoints before delivering the * signal to user space. The processor register will * have been cleared if the watchpoint triggered * inside the kernel. diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 683802bec419..ab086b0357fc 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -410,7 +410,7 @@ static void do_signal(struct pt_regs *regs) signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { - /* Reenable any watchpoints before delivering the + /* Re-enable any watchpoints before delivering the * signal to user space. The processor register will * have been cleared if the watchpoint triggered * inside the kernel. diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c index 2d35d8502029..fcaa026eb807 100644 --- a/arch/x86/kernel/smp_32.c +++ b/arch/x86/kernel/smp_32.c @@ -69,7 +69,7 @@ * * B stepping CPUs may hang. There are hardware work arounds * for this. We warn about it in case your board doesn't have the work - * arounds. Basically thats so I can tell anyone with a B stepping + * arounds. Basically that's so I can tell anyone with a B stepping * CPU and SMP problems "tough". * * Specific items [From Pentium Processor Specification Update] @@ -273,7 +273,7 @@ void leave_mm(unsigned long cpu) * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); * Stop ipi delivery for the old mm. This is not synchronized with * the other cpus, but smp_invalidate_interrupt ignore flush ipis - * for the wrong mm, and in the worst case we perform a superflous + * for the wrong mm, and in the worst case we perform a superfluous * tlb flush. * 1a2) set cpu_tlbstate to TLBSTATE_OK * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 @@ -342,6 +342,7 @@ fastcall void smp_invalidate_interrupt(struct pt_regs *regs) smp_mb__after_clear_bit(); out: put_cpu_no_resched(); + __get_cpu_var(irq_stat).irq_tlb_count++; } void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, @@ -609,7 +610,7 @@ static void stop_this_cpu (void * dummy) */ cpu_clear(smp_processor_id(), cpu_online_map); disable_local_APIC(); - if (cpu_data[smp_processor_id()].hlt_works_ok) + if (cpu_data(smp_processor_id()).hlt_works_ok) for(;;) halt(); for (;;); } @@ -640,6 +641,7 @@ static void native_smp_send_stop(void) fastcall void smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); + __get_cpu_var(irq_stat).irq_resched_count++; } fastcall void smp_call_function_interrupt(struct pt_regs *regs) @@ -660,6 +662,7 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs) */ irq_enter(); (*func)(info); + __get_cpu_var(irq_stat).irq_call_count++; irq_exit(); if (wait) { @@ -673,7 +676,7 @@ static int convert_apicid_to_cpu(int apic_id) int i; for (i = 0; i < NR_CPUS; i++) { - if (x86_cpu_to_apicid[i] == apic_id) + if (per_cpu(x86_cpu_to_apicid, i) == apic_id) return i; } return -1; @@ -705,3 +708,4 @@ struct smp_ops smp_ops = { .smp_send_reschedule = native_smp_send_reschedule, .smp_call_function_mask = native_smp_call_function_mask, }; +EXPORT_SYMBOL_GPL(smp_ops); diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index df4a82812adb..03fa6ed559c6 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -163,6 +163,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) out: ack_APIC_irq(); cpu_clear(cpu, f->flush_cpumask); + add_pda(irq_tlb_count, 1); } static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, @@ -321,17 +322,27 @@ void unlock_ipi_call_lock(void) } /* - * this function sends a 'generic call function' IPI to one other CPU - * in the system. - * - * cpu is a standard Linux logical CPU number. + * this function sends a 'generic call function' IPI to all other CPU + * of the system defined in the mask. */ -static void -__smp_call_function_single(int cpu, void (*func) (void *info), void *info, - int nonatomic, int wait) + +static int +__smp_call_function_mask(cpumask_t mask, + void (*func)(void *), void *info, + int wait) { struct call_data_struct data; - int cpus = 1; + cpumask_t allbutself; + int cpus; + + allbutself = cpu_online_map; + cpu_clear(smp_processor_id(), allbutself); + + cpus_and(mask, mask, allbutself); + cpus = cpus_weight(mask); + + if (!cpus) + return 0; data.func = func; data.info = info; @@ -342,19 +353,55 @@ __smp_call_function_single(int cpu, void (*func) (void *info), void *info, call_data = &data; wmb(); - /* Send a message to all other CPUs and wait for them to respond */ - send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR); + + /* Send a message to other CPUs */ + if (cpus_equal(mask, allbutself)) + send_IPI_allbutself(CALL_FUNCTION_VECTOR); + else + send_IPI_mask(mask, CALL_FUNCTION_VECTOR); /* Wait for response */ while (atomic_read(&data.started) != cpus) cpu_relax(); if (!wait) - return; + return 0; while (atomic_read(&data.finished) != cpus) cpu_relax(); + + return 0; +} +/** + * smp_call_function_mask(): Run a function on a set of other CPUs. + * @mask: The set of cpus to run on. Must not include the current cpu. + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @wait: If true, wait (atomically) until function has completed on other CPUs. + * + * Returns 0 on success, else a negative status code. + * + * If @wait is true, then returns once @func has returned; otherwise + * it returns just before the target cpu calls @func. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler or from a bottom half handler. + */ +int smp_call_function_mask(cpumask_t mask, + void (*func)(void *), void *info, + int wait) +{ + int ret; + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + + spin_lock(&call_lock); + ret = __smp_call_function_mask(mask, func, info, wait); + spin_unlock(&call_lock); + return ret; } +EXPORT_SYMBOL(smp_call_function_mask); /* * smp_call_function_single - Run a function on a specific CPU @@ -373,6 +420,7 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info, int nonatomic, int wait) { /* prevent preemption and reschedule on another processor */ + int ret; int me = get_cpu(); /* Can deadlock when called with interrupts disabled */ @@ -386,51 +434,14 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info, return 0; } - spin_lock(&call_lock); - __smp_call_function_single(cpu, func, info, nonatomic, wait); - spin_unlock(&call_lock); + ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); + put_cpu(); - return 0; + return ret; } EXPORT_SYMBOL(smp_call_function_single); /* - * this function sends a 'generic call function' IPI to all other CPUs - * in the system. - */ -static void __smp_call_function (void (*func) (void *info), void *info, - int nonatomic, int wait) -{ - struct call_data_struct data; - int cpus = num_online_cpus()-1; - - if (!cpus) - return; - - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); - - call_data = &data; - wmb(); - /* Send a message to all other CPUs and wait for them to respond */ - send_IPI_allbutself(CALL_FUNCTION_VECTOR); - - /* Wait for response */ - while (atomic_read(&data.started) != cpus) - cpu_relax(); - - if (!wait) - return; - - while (atomic_read(&data.finished) != cpus) - cpu_relax(); -} - -/* * smp_call_function - run a function on all other CPUs. * @func: The function to run. This must be fast and non-blocking. * @info: An arbitrary pointer to pass to the function. @@ -448,10 +459,7 @@ static void __smp_call_function (void (*func) (void *info), void *info, int smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait) { - spin_lock(&call_lock); - __smp_call_function(func,info,nonatomic,wait); - spin_unlock(&call_lock); - return 0; + return smp_call_function_mask(cpu_online_map, func, info, wait); } EXPORT_SYMBOL(smp_call_function); @@ -478,7 +486,7 @@ void smp_send_stop(void) /* Don't deadlock on the call lock in panic */ nolock = !spin_trylock(&call_lock); local_irq_save(flags); - __smp_call_function(stop_this_cpu, NULL, 0, 0); + __smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0); if (!nolock) spin_unlock(&call_lock); disable_local_APIC(); @@ -493,6 +501,7 @@ void smp_send_stop(void) asmlinkage void smp_reschedule_interrupt(void) { ack_APIC_irq(); + add_pda(irq_resched_count, 1); } asmlinkage void smp_call_function_interrupt(void) @@ -514,6 +523,7 @@ asmlinkage void smp_call_function_interrupt(void) exit_idle(); irq_enter(); (*func)(info); + add_pda(irq_call_count, 1); irq_exit(); if (wait) { mb(); diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c index e4f61d1c6248..ef0f34ede1ab 100644 --- a/arch/x86/kernel/smpboot_32.c +++ b/arch/x86/kernel/smpboot_32.c @@ -67,15 +67,15 @@ int smp_num_siblings = 1; EXPORT_SYMBOL(smp_num_siblings); /* Last level cache ID of each logical CPU */ -int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; +DEFINE_PER_CPU(u8, cpu_llc_id) = BAD_APICID; /* representing HT siblings of each logical CPU */ -cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(cpu_sibling_map); +DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); +EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); /* representing HT and core siblings of each logical CPU */ -cpumask_t cpu_core_map[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(cpu_core_map); +DEFINE_PER_CPU(cpumask_t, cpu_core_map); +EXPORT_PER_CPU_SYMBOL(cpu_core_map); /* bitmap of online cpus */ cpumask_t cpu_online_map __read_mostly; @@ -89,12 +89,20 @@ EXPORT_SYMBOL(cpu_possible_map); static cpumask_t smp_commenced_mask; /* Per CPU bogomips and other parameters */ -struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_data); +DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); +EXPORT_PER_CPU_SYMBOL(cpu_info); -u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = - { [0 ... NR_CPUS-1] = 0xff }; -EXPORT_SYMBOL(x86_cpu_to_apicid); +/* + * The following static array is used during kernel startup + * and the x86_cpu_to_apicid_ptr contains the address of the + * array during this time. Is it zeroed when the per_cpu + * data area is removed. + */ +u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata = + { [0 ... NR_CPUS-1] = BAD_APICID }; +void *x86_cpu_to_apicid_ptr; +DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID; +EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); u8 apicid_2_node[MAX_APICID]; @@ -102,8 +110,8 @@ u8 apicid_2_node[MAX_APICID]; * Trampoline 80x86 program as an array. */ -extern unsigned char trampoline_data []; -extern unsigned char trampoline_end []; +extern const unsigned char trampoline_data []; +extern const unsigned char trampoline_end []; static unsigned char *trampoline_base; static int trampoline_exec; @@ -118,7 +126,7 @@ DEFINE_PER_CPU(int, cpu_state) = { 0 }; * has made sure it's suitably aligned. */ -static unsigned long __devinit setup_trampoline(void) +static unsigned long __cpuinit setup_trampoline(void) { memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); return virt_to_phys(trampoline_base); @@ -150,9 +158,10 @@ void __init smp_alloc_memory(void) void __cpuinit smp_store_cpu_info(int id) { - struct cpuinfo_x86 *c = cpu_data + id; + struct cpuinfo_x86 *c = &cpu_data(id); *c = boot_cpu_data; + c->cpu_index = id; if (id!=0) identify_secondary_cpu(c); /* @@ -294,13 +303,13 @@ static int cpucount; /* maps the cpu to the sched domain representing multi-core */ cpumask_t cpu_coregroup_map(int cpu) { - struct cpuinfo_x86 *c = cpu_data + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); /* * For perf, we return last level cache shared map. * And for power savings, we return cpu_core_map */ if (sched_mc_power_savings || sched_smt_power_savings) - return cpu_core_map[cpu]; + return per_cpu(cpu_core_map, cpu); else return c->llc_shared_map; } @@ -311,61 +320,61 @@ static cpumask_t cpu_sibling_setup_map; void __cpuinit set_cpu_sibling_map(int cpu) { int i; - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(cpu); cpu_set(cpu, cpu_sibling_setup_map); if (smp_num_siblings > 1) { for_each_cpu_mask(i, cpu_sibling_setup_map) { - if (c[cpu].phys_proc_id == c[i].phys_proc_id && - c[cpu].cpu_core_id == c[i].cpu_core_id) { - cpu_set(i, cpu_sibling_map[cpu]); - cpu_set(cpu, cpu_sibling_map[i]); - cpu_set(i, cpu_core_map[cpu]); - cpu_set(cpu, cpu_core_map[i]); - cpu_set(i, c[cpu].llc_shared_map); - cpu_set(cpu, c[i].llc_shared_map); + if (c->phys_proc_id == cpu_data(i).phys_proc_id && + c->cpu_core_id == cpu_data(i).cpu_core_id) { + cpu_set(i, per_cpu(cpu_sibling_map, cpu)); + cpu_set(cpu, per_cpu(cpu_sibling_map, i)); + cpu_set(i, per_cpu(cpu_core_map, cpu)); + cpu_set(cpu, per_cpu(cpu_core_map, i)); + cpu_set(i, c->llc_shared_map); + cpu_set(cpu, cpu_data(i).llc_shared_map); } } } else { - cpu_set(cpu, cpu_sibling_map[cpu]); + cpu_set(cpu, per_cpu(cpu_sibling_map, cpu)); } - cpu_set(cpu, c[cpu].llc_shared_map); + cpu_set(cpu, c->llc_shared_map); if (current_cpu_data.x86_max_cores == 1) { - cpu_core_map[cpu] = cpu_sibling_map[cpu]; - c[cpu].booted_cores = 1; + per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu); + c->booted_cores = 1; return; } for_each_cpu_mask(i, cpu_sibling_setup_map) { - if (cpu_llc_id[cpu] != BAD_APICID && - cpu_llc_id[cpu] == cpu_llc_id[i]) { - cpu_set(i, c[cpu].llc_shared_map); - cpu_set(cpu, c[i].llc_shared_map); + if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && + per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { + cpu_set(i, c->llc_shared_map); + cpu_set(cpu, cpu_data(i).llc_shared_map); } - if (c[cpu].phys_proc_id == c[i].phys_proc_id) { - cpu_set(i, cpu_core_map[cpu]); - cpu_set(cpu, cpu_core_map[i]); + if (c->phys_proc_id == cpu_data(i).phys_proc_id) { + cpu_set(i, per_cpu(cpu_core_map, cpu)); + cpu_set(cpu, per_cpu(cpu_core_map, i)); /* * Does this new cpu bringup a new core? */ - if (cpus_weight(cpu_sibling_map[cpu]) == 1) { + if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) { /* * for each core in package, increment * the booted_cores for this new cpu */ - if (first_cpu(cpu_sibling_map[i]) == i) - c[cpu].booted_cores++; + if (first_cpu(per_cpu(cpu_sibling_map, i)) == i) + c->booted_cores++; /* * increment the core count for all * the other cpus in this package */ if (i != cpu) - c[i].booted_cores++; - } else if (i != cpu && !c[cpu].booted_cores) - c[cpu].booted_cores = c[i].booted_cores; + cpu_data(i).booted_cores++; + } else if (i != cpu && !c->booted_cores) + c->booted_cores = cpu_data(i).booted_cores; } } } @@ -412,7 +421,7 @@ static void __cpuinit start_secondary(void *unused) /* * We need to hold call_lock, so there is no inconsistency * between the time smp_call_function() determines number of - * IPI receipients, and the time when the determination is made + * IPI recipients, and the time when the determination is made * for which cpus receive the IPI. Holding this * lock helps us to not include this cpu in a currently in progress * smp_call_function(). @@ -804,7 +813,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) irq_ctx_init(cpu); - x86_cpu_to_apicid[cpu] = apicid; + per_cpu(x86_cpu_to_apicid, cpu) = apicid; /* * This grunge runs the startup process for * the targeted processor. @@ -844,7 +853,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) /* number CPUs logically, starting from 1 (BSP is 0) */ Dprintk("OK.\n"); printk("CPU%d: ", cpu); - print_cpu_info(&cpu_data[cpu]); + print_cpu_info(&cpu_data(cpu)); Dprintk("CPU has booted.\n"); } else { boot_error= 1; @@ -866,7 +875,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ cpucount--; } else { - x86_cpu_to_apicid[cpu] = apicid; + per_cpu(x86_cpu_to_apicid, cpu) = apicid; cpu_set(cpu, cpu_present_map); } @@ -915,7 +924,7 @@ static int __cpuinit __smp_prepare_cpu(int cpu) struct warm_boot_cpu_info info; int apicid, ret; - apicid = x86_cpu_to_apicid[cpu]; + apicid = per_cpu(x86_cpu_to_apicid, cpu); if (apicid == BAD_APICID) { ret = -ENODEV; goto exit; @@ -961,11 +970,11 @@ static void __init smp_boot_cpus(unsigned int max_cpus) */ smp_store_cpu_info(0); /* Final full version of the data */ printk("CPU%d: ", 0); - print_cpu_info(&cpu_data[0]); + print_cpu_info(&cpu_data(0)); boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); boot_cpu_logical_apicid = logical_smp_processor_id(); - x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; + per_cpu(x86_cpu_to_apicid, 0) = boot_cpu_physical_apicid; current_thread_info()->cpu = 0; @@ -983,8 +992,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus) printk(KERN_NOTICE "Local APIC not detected." " Using dummy APIC emulation.\n"); map_cpu_to_logical_apicid(); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); + cpu_set(0, per_cpu(cpu_sibling_map, 0)); + cpu_set(0, per_cpu(cpu_core_map, 0)); return; } @@ -1008,8 +1017,9 @@ static void __init smp_boot_cpus(unsigned int max_cpus) printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); smpboot_clear_io_apic_irqs(); phys_cpu_present_map = physid_mask_of_physid(0); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); + map_cpu_to_logical_apicid(); + cpu_set(0, per_cpu(cpu_sibling_map, 0)); + cpu_set(0, per_cpu(cpu_core_map, 0)); return; } @@ -1021,10 +1031,17 @@ static void __init smp_boot_cpus(unsigned int max_cpus) if (!max_cpus) { smp_found_config = 0; printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); + + if (nmi_watchdog == NMI_LOCAL_APIC) { + printk(KERN_INFO "activating minimal APIC for NMI watchdog use.\n"); + connect_bsp_APIC(); + setup_local_APIC(); + } smpboot_clear_io_apic_irqs(); phys_cpu_present_map = physid_mask_of_physid(0); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); + map_cpu_to_logical_apicid(); + cpu_set(0, per_cpu(cpu_sibling_map, 0)); + cpu_set(0, per_cpu(cpu_core_map, 0)); return; } @@ -1076,7 +1093,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) Dprintk("Before bogomips.\n"); for (cpu = 0; cpu < NR_CPUS; cpu++) if (cpu_isset(cpu, cpu_callout_map)) - bogosum += cpu_data[cpu].loops_per_jiffy; + bogosum += cpu_data(cpu).loops_per_jiffy; printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", cpucount+1, @@ -1102,16 +1119,16 @@ static void __init smp_boot_cpus(unsigned int max_cpus) Dprintk("Boot done.\n"); /* - * construct cpu_sibling_map[], so that we can tell sibling CPUs + * construct cpu_sibling_map, so that we can tell sibling CPUs * efficiently. */ for (cpu = 0; cpu < NR_CPUS; cpu++) { - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); + cpus_clear(per_cpu(cpu_sibling_map, cpu)); + cpus_clear(per_cpu(cpu_core_map, cpu)); } - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); + cpu_set(0, per_cpu(cpu_sibling_map, 0)); + cpu_set(0, per_cpu(cpu_core_map, 0)); smpboot_setup_io_apic(); @@ -1146,23 +1163,23 @@ void __init native_smp_prepare_boot_cpu(void) void remove_siblinginfo(int cpu) { int sibling; - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(cpu); - for_each_cpu_mask(sibling, cpu_core_map[cpu]) { - cpu_clear(cpu, cpu_core_map[sibling]); - /* + for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) { + cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); + /*/ * last thread sibling in this cpu core going down */ - if (cpus_weight(cpu_sibling_map[cpu]) == 1) - c[sibling].booted_cores--; + if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) + cpu_data(sibling).booted_cores--; } - for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) - cpu_clear(cpu, cpu_sibling_map[sibling]); - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); - c[cpu].phys_proc_id = 0; - c[cpu].cpu_core_id = 0; + for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu)) + cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); + cpus_clear(per_cpu(cpu_sibling_map, cpu)); + cpus_clear(per_cpu(cpu_core_map, cpu)); + c->phys_proc_id = 0; + c->cpu_core_id = 0; cpu_clear(cpu, cpu_sibling_setup_map); } diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 720a7d1f8862..500670c93d81 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -65,7 +65,7 @@ int smp_num_siblings = 1; EXPORT_SYMBOL(smp_num_siblings); /* Last level cache ID of each logical CPU */ -u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; +DEFINE_PER_CPU(u8, cpu_llc_id) = BAD_APICID; /* Bitmask of currently online CPUs */ cpumask_t cpu_online_map __read_mostly; @@ -84,26 +84,26 @@ cpumask_t cpu_possible_map; EXPORT_SYMBOL(cpu_possible_map); /* Per CPU bogomips and other parameters */ -struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_data); +DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); +EXPORT_PER_CPU_SYMBOL(cpu_info); /* Set when the idlers are all forked */ int smp_threads_ready; /* representing HT siblings of each logical CPU */ -cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(cpu_sibling_map); +DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); +EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); /* representing HT and core siblings of each logical CPU */ -cpumask_t cpu_core_map[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(cpu_core_map); +DEFINE_PER_CPU(cpumask_t, cpu_core_map); +EXPORT_PER_CPU_SYMBOL(cpu_core_map); /* * Trampoline 80x86 program as an array. */ -extern unsigned char trampoline_data[]; -extern unsigned char trampoline_end[]; +extern const unsigned char trampoline_data[]; +extern const unsigned char trampoline_end[]; /* State of each CPU */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; @@ -138,9 +138,10 @@ static unsigned long __cpuinit setup_trampoline(void) static void __cpuinit smp_store_cpu_info(int id) { - struct cpuinfo_x86 *c = cpu_data + id; + struct cpuinfo_x86 *c = &cpu_data(id); *c = boot_cpu_data; + c->cpu_index = id; identify_cpu(c); print_cpu_info(c); } @@ -237,13 +238,13 @@ void __cpuinit smp_callin(void) /* maps the cpu to the sched domain representing multi-core */ cpumask_t cpu_coregroup_map(int cpu) { - struct cpuinfo_x86 *c = cpu_data + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); /* * For perf, we return last level cache shared map. * And for power savings, we return cpu_core_map */ if (sched_mc_power_savings || sched_smt_power_savings) - return cpu_core_map[cpu]; + return per_cpu(cpu_core_map, cpu); else return c->llc_shared_map; } @@ -254,61 +255,61 @@ static cpumask_t cpu_sibling_setup_map; static inline void set_cpu_sibling_map(int cpu) { int i; - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(cpu); cpu_set(cpu, cpu_sibling_setup_map); if (smp_num_siblings > 1) { for_each_cpu_mask(i, cpu_sibling_setup_map) { - if (c[cpu].phys_proc_id == c[i].phys_proc_id && - c[cpu].cpu_core_id == c[i].cpu_core_id) { - cpu_set(i, cpu_sibling_map[cpu]); - cpu_set(cpu, cpu_sibling_map[i]); - cpu_set(i, cpu_core_map[cpu]); - cpu_set(cpu, cpu_core_map[i]); - cpu_set(i, c[cpu].llc_shared_map); - cpu_set(cpu, c[i].llc_shared_map); + if (c->phys_proc_id == cpu_data(i).phys_proc_id && + c->cpu_core_id == cpu_data(i).cpu_core_id) { + cpu_set(i, per_cpu(cpu_sibling_map, cpu)); + cpu_set(cpu, per_cpu(cpu_sibling_map, i)); + cpu_set(i, per_cpu(cpu_core_map, cpu)); + cpu_set(cpu, per_cpu(cpu_core_map, i)); + cpu_set(i, c->llc_shared_map); + cpu_set(cpu, cpu_data(i).llc_shared_map); } } } else { - cpu_set(cpu, cpu_sibling_map[cpu]); + cpu_set(cpu, per_cpu(cpu_sibling_map, cpu)); } - cpu_set(cpu, c[cpu].llc_shared_map); + cpu_set(cpu, c->llc_shared_map); if (current_cpu_data.x86_max_cores == 1) { - cpu_core_map[cpu] = cpu_sibling_map[cpu]; - c[cpu].booted_cores = 1; + per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu); + c->booted_cores = 1; return; } for_each_cpu_mask(i, cpu_sibling_setup_map) { - if (cpu_llc_id[cpu] != BAD_APICID && - cpu_llc_id[cpu] == cpu_llc_id[i]) { - cpu_set(i, c[cpu].llc_shared_map); - cpu_set(cpu, c[i].llc_shared_map); + if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && + per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { + cpu_set(i, c->llc_shared_map); + cpu_set(cpu, cpu_data(i).llc_shared_map); } - if (c[cpu].phys_proc_id == c[i].phys_proc_id) { - cpu_set(i, cpu_core_map[cpu]); - cpu_set(cpu, cpu_core_map[i]); + if (c->phys_proc_id == cpu_data(i).phys_proc_id) { + cpu_set(i, per_cpu(cpu_core_map, cpu)); + cpu_set(cpu, per_cpu(cpu_core_map, i)); /* * Does this new cpu bringup a new core? */ - if (cpus_weight(cpu_sibling_map[cpu]) == 1) { + if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) { /* * for each core in package, increment * the booted_cores for this new cpu */ - if (first_cpu(cpu_sibling_map[i]) == i) - c[cpu].booted_cores++; + if (first_cpu(per_cpu(cpu_sibling_map, i)) == i) + c->booted_cores++; /* * increment the core count for all * the other cpus in this package */ if (i != cpu) - c[i].booted_cores++; - } else if (i != cpu && !c[cpu].booted_cores) - c[cpu].booted_cores = c[i].booted_cores; + cpu_data(i).booted_cores++; + } else if (i != cpu && !c->booted_cores) + c->booted_cores = cpu_data(i).booted_cores; } } } @@ -350,7 +351,7 @@ void __cpuinit start_secondary(void) /* * We need to hold call_lock, so there is no inconsistency * between the time smp_call_function() determines number of - * IPI receipients, and the time when the determination is made + * IPI recipients, and the time when the determination is made * for which cpus receive the IPI in genapic_flat.c. Holding this * lock helps us to not include this cpu in a currently in progress * smp_call_function(). @@ -387,7 +388,7 @@ static void inquire_remote_apic(int apicid) printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); - for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) { + for (i = 0; i < ARRAY_SIZE(regs); i++) { printk("... APIC #%d %s: ", apicid, names[i]); /* @@ -694,8 +695,7 @@ do_rest: clear_node_cpumask(cpu); /* was set by numa_add_cpu */ cpu_clear(cpu, cpu_present_map); cpu_clear(cpu, cpu_possible_map); - x86_cpu_to_apicid[cpu] = BAD_APICID; - x86_cpu_to_log_apicid[cpu] = BAD_APICID; + per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; return -EIO; } @@ -735,8 +735,8 @@ static __init void disable_smp(void) phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); else phys_cpu_present_map = physid_mask_of_physid(0); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); + cpu_set(0, per_cpu(cpu_sibling_map, 0)); + cpu_set(0, per_cpu(cpu_core_map, 0)); } #ifdef CONFIG_HOTPLUG_CPU @@ -842,6 +842,26 @@ static int __init smp_sanity_check(unsigned max_cpus) } /* + * Copy apicid's found by MP_processor_info from initial array to the per cpu + * data area. The x86_cpu_to_apicid_init array is then expendable and the + * x86_cpu_to_apicid_ptr is zeroed indicating that the static array is no + * longer available. + */ +void __init smp_set_apicids(void) +{ + int cpu; + + for_each_cpu_mask(cpu, cpu_possible_map) { + if (per_cpu_offset(cpu)) + per_cpu(x86_cpu_to_apicid, cpu) = + x86_cpu_to_apicid_init[cpu]; + } + + /* indicate the static array will be going away soon */ + x86_cpu_to_apicid_ptr = NULL; +} + +/* * Prepare for SMP bootup. The MP table or ACPI has been read * earlier. Just do some sanity checking here and enable APIC mode. */ @@ -850,6 +870,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) nmi_watchdog_default(); current_cpu_data = boot_cpu_data; current_thread_info()->cpu = 0; /* needed? */ + smp_set_apicids(); set_cpu_sibling_map(0); if (smp_sanity_check(max_cpus) < 0) { @@ -969,23 +990,23 @@ void __init smp_cpus_done(unsigned int max_cpus) static void remove_siblinginfo(int cpu) { int sibling; - struct cpuinfo_x86 *c = cpu_data; + struct cpuinfo_x86 *c = &cpu_data(cpu); - for_each_cpu_mask(sibling, cpu_core_map[cpu]) { - cpu_clear(cpu, cpu_core_map[sibling]); + for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) { + cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); /* * last thread sibling in this cpu core going down */ - if (cpus_weight(cpu_sibling_map[cpu]) == 1) - c[sibling].booted_cores--; + if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) + cpu_data(sibling).booted_cores--; } - for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) - cpu_clear(cpu, cpu_sibling_map[sibling]); - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); - c[cpu].phys_proc_id = 0; - c[cpu].cpu_core_id = 0; + for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu)) + cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); + cpus_clear(per_cpu(cpu_sibling_map, cpu)); + cpus_clear(per_cpu(cpu_core_map, cpu)); + c->phys_proc_id = 0; + c->cpu_core_id = 0; cpu_clear(cpu, cpu_sibling_setup_map); } diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 413e527cdeb9..6fa6cf036c70 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -33,7 +33,7 @@ static void save_stack_address(void *data, unsigned long addr) trace->entries[trace->nr_entries++] = addr; } -static struct stacktrace_ops save_stack_ops = { +static const struct stacktrace_ops save_stack_ops = { .warning = save_stack_warning, .warning_symbol = save_stack_warning_symbol, .stack = save_stack_stack, diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 91c7acc8d999..72f463401592 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -64,7 +64,7 @@ static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) switch (rio_devs[wpeg_num]->type){ case CompatWPEG: - /* The Compatability Winnipeg controls the 2 legacy buses, + /* The Compatibility Winnipeg controls the 2 legacy buses, * the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case * a PCI-PCI bridge card is used in either slot: total 5 buses. */ diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/kernel/suspend_64.c index 573c0a6e0ac6..db284ef44d53 100644 --- a/arch/x86/kernel/suspend_64.c +++ b/arch/x86/kernel/suspend_64.c @@ -19,12 +19,6 @@ extern const void __nosave_begin, __nosave_end; struct saved_context saved_context; -unsigned long saved_context_eax, saved_context_ebx, saved_context_ecx, saved_context_edx; -unsigned long saved_context_esp, saved_context_ebp, saved_context_esi, saved_context_edi; -unsigned long saved_context_r08, saved_context_r09, saved_context_r10, saved_context_r11; -unsigned long saved_context_r12, saved_context_r13, saved_context_r14, saved_context_r15; -unsigned long saved_context_eflags; - void __save_processor_state(struct saved_context *ctxt) { kernel_fpu_begin(); @@ -32,9 +26,9 @@ void __save_processor_state(struct saved_context *ctxt) /* * descriptor tables */ - asm volatile ("sgdt %0" : "=m" (ctxt->gdt_limit)); - asm volatile ("sidt %0" : "=m" (ctxt->idt_limit)); - asm volatile ("str %0" : "=m" (ctxt->tr)); + store_gdt((struct desc_ptr *)&ctxt->gdt_limit); + store_idt((struct desc_ptr *)&ctxt->idt_limit); + store_tr(ctxt->tr); /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */ /* @@ -91,8 +85,9 @@ void __restore_processor_state(struct saved_context *ctxt) * now restore the descriptor tables to their proper values * ltr is done i fix_processor_context(). */ - asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit)); - asm volatile ("lidt %0" :: "m" (ctxt->idt_limit)); + load_gdt((const struct desc_ptr *)&ctxt->gdt_limit); + load_idt((const struct desc_ptr *)&ctxt->idt_limit); + /* * segment registers @@ -123,7 +118,7 @@ void fix_processor_context(void) int cpu = smp_processor_id(); struct tss_struct *t = &per_cpu(init_tss, cpu); - set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy TSS or some similar stupidity. */ + set_tss_desc(cpu,t); /* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */ cpu_gdt(cpu)[GDT_ENTRY_TSS].type = 9; @@ -150,8 +145,22 @@ void fix_processor_context(void) /* Defined in arch/x86_64/kernel/suspend_asm.S */ extern int restore_image(void); +/* + * Address to jump to in the last phase of restore in order to get to the image + * kernel's text (this value is passed in the image header). + */ +unsigned long restore_jump_address; + +/* + * Value of the cr3 register from before the hibernation (this value is passed + * in the image header). + */ +unsigned long restore_cr3; + pgd_t *temp_level4_pgt; +void *relocated_restore_code; + static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) { long i, j; @@ -175,7 +184,7 @@ static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long en if (paddr >= end) break; - pe = _PAGE_NX | _PAGE_PSE | _KERNPG_TABLE | paddr; + pe = __PAGE_KERNEL_LARGE_EXEC | paddr; pe &= __supported_pte_mask; set_pmd(pmd, __pmd(pe)); } @@ -183,25 +192,42 @@ static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long en return 0; } +static int res_kernel_text_pud_init(pud_t *pud, unsigned long start) +{ + pmd_t *pmd; + unsigned long paddr; + + pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); + if (!pmd) + return -ENOMEM; + set_pud(pud + pud_index(start), __pud(__pa(pmd) | _KERNPG_TABLE)); + for (paddr = 0; paddr < KERNEL_TEXT_SIZE; pmd++, paddr += PMD_SIZE) { + unsigned long pe; + + pe = __PAGE_KERNEL_LARGE_EXEC | _PAGE_GLOBAL | paddr; + pe &= __supported_pte_mask; + set_pmd(pmd, __pmd(pe)); + } + + return 0; +} + static int set_up_temporary_mappings(void) { unsigned long start, end, next; + pud_t *pud; int error; temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC); if (!temp_level4_pgt) return -ENOMEM; - /* It is safe to reuse the original kernel mapping */ - set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map), - init_level4_pgt[pgd_index(__START_KERNEL_map)]); - /* Set up the direct mapping from scratch */ start = (unsigned long)pfn_to_kaddr(0); end = (unsigned long)pfn_to_kaddr(end_pfn); for (; start < end; start = next) { - pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC); + pud = (pud_t *)get_safe_page(GFP_ATOMIC); if (!pud) return -ENOMEM; next = start + PGDIR_SIZE; @@ -212,7 +238,17 @@ static int set_up_temporary_mappings(void) set_pgd(temp_level4_pgt + pgd_index(start), mk_kernel_pgd(__pa(pud))); } - return 0; + + /* Set up the kernel text mapping from scratch */ + pud = (pud_t *)get_safe_page(GFP_ATOMIC); + if (!pud) + return -ENOMEM; + error = res_kernel_text_pud_init(pud, __START_KERNEL_map); + if (!error) + set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map), + __pgd(__pa(pud) | _PAGE_TABLE)); + + return error; } int swsusp_arch_resume(void) @@ -222,6 +258,13 @@ int swsusp_arch_resume(void) /* We have got enough memory and from now on we cannot recover */ if ((error = set_up_temporary_mappings())) return error; + + relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC); + if (!relocated_restore_code) + return -ENOMEM; + memcpy(relocated_restore_code, &core_restore_code, + &restore_registers - &core_restore_code); + restore_image(); return 0; } @@ -236,4 +279,43 @@ int pfn_is_nosave(unsigned long pfn) unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT; return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); } + +struct restore_data_record { + unsigned long jump_address; + unsigned long cr3; + unsigned long magic; +}; + +#define RESTORE_MAGIC 0x0123456789ABCDEFUL + +/** + * arch_hibernation_header_save - populate the architecture specific part + * of a hibernation image header + * @addr: address to save the data at + */ +int arch_hibernation_header_save(void *addr, unsigned int max_size) +{ + struct restore_data_record *rdr = addr; + + if (max_size < sizeof(struct restore_data_record)) + return -EOVERFLOW; + rdr->jump_address = restore_jump_address; + rdr->cr3 = restore_cr3; + rdr->magic = RESTORE_MAGIC; + return 0; +} + +/** + * arch_hibernation_header_restore - read the architecture specific data + * from the hibernation image header + * @addr: address to read the data from + */ +int arch_hibernation_header_restore(void *addr) +{ + struct restore_data_record *rdr = addr; + + restore_jump_address = rdr->jump_address; + restore_cr3 = rdr->cr3; + return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL; +} #endif /* CONFIG_HIBERNATION */ diff --git a/arch/x86/kernel/suspend_asm_64.S b/arch/x86/kernel/suspend_asm_64.S index 16d183f67bc1..72f952103e50 100644 --- a/arch/x86/kernel/suspend_asm_64.S +++ b/arch/x86/kernel/suspend_asm_64.S @@ -2,8 +2,8 @@ * * Distribute under GPLv2. * - * swsusp_arch_resume may not use any stack, nor any variable that is - * not "NoSave" during copying pages: + * swsusp_arch_resume must not use any stack or any nonlocal variables while + * copying pages: * * Its rewriting one kernel image with another. What is stack in "old" * image could very well be data page in "new" image, and overwriting @@ -17,24 +17,31 @@ #include <asm/asm-offsets.h> ENTRY(swsusp_arch_suspend) + movq $saved_context, %rax + movq %rsp, pt_regs_rsp(%rax) + movq %rbp, pt_regs_rbp(%rax) + movq %rsi, pt_regs_rsi(%rax) + movq %rdi, pt_regs_rdi(%rax) + movq %rbx, pt_regs_rbx(%rax) + movq %rcx, pt_regs_rcx(%rax) + movq %rdx, pt_regs_rdx(%rax) + movq %r8, pt_regs_r8(%rax) + movq %r9, pt_regs_r9(%rax) + movq %r10, pt_regs_r10(%rax) + movq %r11, pt_regs_r11(%rax) + movq %r12, pt_regs_r12(%rax) + movq %r13, pt_regs_r13(%rax) + movq %r14, pt_regs_r14(%rax) + movq %r15, pt_regs_r15(%rax) + pushfq + popq pt_regs_eflags(%rax) - movq %rsp, saved_context_esp(%rip) - movq %rax, saved_context_eax(%rip) - movq %rbx, saved_context_ebx(%rip) - movq %rcx, saved_context_ecx(%rip) - movq %rdx, saved_context_edx(%rip) - movq %rbp, saved_context_ebp(%rip) - movq %rsi, saved_context_esi(%rip) - movq %rdi, saved_context_edi(%rip) - movq %r8, saved_context_r08(%rip) - movq %r9, saved_context_r09(%rip) - movq %r10, saved_context_r10(%rip) - movq %r11, saved_context_r11(%rip) - movq %r12, saved_context_r12(%rip) - movq %r13, saved_context_r13(%rip) - movq %r14, saved_context_r14(%rip) - movq %r15, saved_context_r15(%rip) - pushfq ; popq saved_context_eflags(%rip) + /* save the address of restore_registers */ + movq $restore_registers, %rax + movq %rax, restore_jump_address(%rip) + /* save cr3 */ + movq %cr3, %rax + movq %rax, restore_cr3(%rip) call swsusp_save ret @@ -54,7 +61,17 @@ ENTRY(restore_image) movq %rcx, %cr3; movq %rax, %cr4; # turn PGE back on + /* prepare to jump to the image kernel */ + movq restore_jump_address(%rip), %rax + movq restore_cr3(%rip), %rbx + + /* prepare to copy image data to their original locations */ movq restore_pblist(%rip), %rdx + movq relocated_restore_code(%rip), %rcx + jmpq *%rcx + + /* code below has been relocated to a safe page */ +ENTRY(core_restore_code) loop: testq %rdx, %rdx jz done @@ -62,7 +79,7 @@ loop: /* get addresses from the pbe and copy the page */ movq pbe_address(%rdx), %rsi movq pbe_orig_address(%rdx), %rdi - movq $512, %rcx + movq $(PAGE_SIZE >> 3), %rcx rep movsq @@ -70,10 +87,22 @@ loop: movq pbe_next(%rdx), %rdx jmp loop done: + /* jump to the restore_registers address from the image header */ + jmpq *%rax + /* + * NOTE: This assumes that the boot kernel's text mapping covers the + * image kernel's page containing restore_registers and the address of + * this page is the same as in the image kernel's text mapping (it + * should always be true, because the text mapping is linear, starting + * from 0, and is supposed to cover the entire kernel text for every + * kernel). + * + * code below belongs to the image kernel + */ + +ENTRY(restore_registers) /* go back to the original page tables */ - movq $(init_level4_pgt - __START_KERNEL_map), %rax - addq phys_base(%rip), %rax - movq %rax, %cr3 + movq %rbx, %cr3 /* Flush TLB, including "global" things (vmalloc) */ movq mmu_cr4_features(%rip), %rax @@ -84,27 +113,29 @@ done: movq %rcx, %cr3 movq %rax, %cr4; # turn PGE back on - movl $24, %eax - movl %eax, %ds - - movq saved_context_esp(%rip), %rsp - movq saved_context_ebp(%rip), %rbp - /* Don't restore %rax, it must be 0 anyway */ - movq saved_context_ebx(%rip), %rbx - movq saved_context_ecx(%rip), %rcx - movq saved_context_edx(%rip), %rdx - movq saved_context_esi(%rip), %rsi - movq saved_context_edi(%rip), %rdi - movq saved_context_r08(%rip), %r8 - movq saved_context_r09(%rip), %r9 - movq saved_context_r10(%rip), %r10 - movq saved_context_r11(%rip), %r11 - movq saved_context_r12(%rip), %r12 - movq saved_context_r13(%rip), %r13 - movq saved_context_r14(%rip), %r14 - movq saved_context_r15(%rip), %r15 - pushq saved_context_eflags(%rip) ; popfq + /* We don't restore %rax, it must be 0 anyway */ + movq $saved_context, %rax + movq pt_regs_rsp(%rax), %rsp + movq pt_regs_rbp(%rax), %rbp + movq pt_regs_rsi(%rax), %rsi + movq pt_regs_rdi(%rax), %rdi + movq pt_regs_rbx(%rax), %rbx + movq pt_regs_rcx(%rax), %rcx + movq pt_regs_rdx(%rax), %rdx + movq pt_regs_r8(%rax), %r8 + movq pt_regs_r9(%rax), %r9 + movq pt_regs_r10(%rax), %r10 + movq pt_regs_r11(%rax), %r11 + movq pt_regs_r12(%rax), %r12 + movq pt_regs_r13(%rax), %r13 + movq pt_regs_r14(%rax), %r14 + movq pt_regs_r15(%rax), %r15 + pushq pt_regs_eflags(%rax) + popfq xorq %rax, %rax + /* tell the hibernation core that we've just restored the memory */ + movq %rax, in_suspend(%rip) + ret diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index f8bae9ba0324..a86d26f036e1 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -17,10 +17,10 @@ #include <linux/mman.h> #include <linux/file.h> #include <linux/utsname.h> +#include <linux/ipc.h> #include <asm/uaccess.h> #include <asm/unistd.h> -#include <asm/ipc.h> /* * sys_pipe() is the normal C calling standard for creating diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c index e3f2569b2c44..9e540fee7009 100644 --- a/arch/x86/kernel/tce_64.c +++ b/arch/x86/kernel/tce_64.c @@ -40,9 +40,9 @@ static inline void flush_tce(void* tceaddr) { /* a single tce can't cross a cache line */ if (cpu_has_clflush) - asm volatile("clflush (%0)" :: "r" (tceaddr)); + clflush(tceaddr); else - asm volatile("wbinvd":::"memory"); + wbinvd(); } void tce_build(struct iommu_table *tbl, unsigned long index, diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index c25f23eb397c..8caa0b777466 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -44,15 +44,15 @@ int arch_register_cpu(int num) * Also certain PCI quirks require not to enable hotplug control * for all CPU's. */ - if (num && enable_cpu_hotplug) +#ifdef CONFIG_HOTPLUG_CPU + if (num) cpu_devices[num].cpu.hotpluggable = 1; +#endif return register_cpu(&cpu_devices[num].cpu, num); } #ifdef CONFIG_HOTPLUG_CPU -int enable_cpu_hotplug = 1; - void arch_unregister_cpu(int num) { return unregister_cpu(&cpu_devices[num].cpu); } diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S index f62815f8d06a..9bcc1c6aca3d 100644 --- a/arch/x86/kernel/trampoline_32.S +++ b/arch/x86/kernel/trampoline_32.S @@ -36,11 +36,11 @@ #include <asm/segment.h> #include <asm/page.h> -.data - /* We can free up trampoline after bootup if cpu hotplug is not supported. */ #ifndef CONFIG_HOTPLUG_CPU .section ".init.data","aw",@progbits +#else +.section .rodata,"a",@progbits #endif .code16 diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 607983b0d27b..e30b67c6a9f5 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S @@ -33,7 +33,12 @@ #include <asm/msr.h> #include <asm/segment.h> -.data +/* We can free up trampoline after bootup if cpu hotplug is not supported. */ +#ifndef CONFIG_HOTPLUG_CPU +.section .init.data, "aw", @progbits +#else +.section .rodata, "a", @progbits +#endif .code16 diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 05c27ecaf2a7..298d13ed3ab3 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -63,6 +63,9 @@ int panic_on_unrecovered_nmi; +DECLARE_BITMAP(used_vectors, NR_VECTORS); +EXPORT_SYMBOL_GPL(used_vectors); + asmlinkage int system_call(void); /* Do we ignore FPU interrupts ? */ @@ -112,7 +115,7 @@ struct stack_frame { static inline unsigned long print_context_stack(struct thread_info *tinfo, unsigned long *stack, unsigned long ebp, - struct stacktrace_ops *ops, void *data) + const struct stacktrace_ops *ops, void *data) { #ifdef CONFIG_FRAME_POINTER struct stack_frame *frame = (struct stack_frame *)ebp; @@ -149,7 +152,7 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, - struct stacktrace_ops *ops, void *data) + const struct stacktrace_ops *ops, void *data) { unsigned long ebp = 0; @@ -221,7 +224,7 @@ static void print_trace_address(void *data, unsigned long addr) touch_nmi_watchdog(); } -static struct stacktrace_ops print_trace_ops = { +static const struct stacktrace_ops print_trace_ops = { .warning = print_trace_warning, .warning_symbol = print_trace_warning_symbol, .stack = print_trace_stack, @@ -288,48 +291,24 @@ EXPORT_SYMBOL(dump_stack); void show_registers(struct pt_regs *regs) { int i; - int in_kernel = 1; - unsigned long esp; - unsigned short ss, gs; - - esp = (unsigned long) (®s->esp); - savesegment(ss, ss); - savesegment(gs, gs); - if (user_mode_vm(regs)) { - in_kernel = 0; - esp = regs->esp; - ss = regs->xss & 0xffff; - } + print_modules(); - printk(KERN_EMERG "CPU: %d\n" - KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n" - KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n", - smp_processor_id(), 0xffff & regs->xcs, regs->eip, - print_tainted(), regs->eflags, init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip); - printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - regs->eax, regs->ebx, regs->ecx, regs->edx); - printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", - regs->esi, regs->edi, regs->ebp, esp); - printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss); + __show_registers(regs, 0); printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", - TASK_COMM_LEN, current->comm, current->pid, + TASK_COMM_LEN, current->comm, task_pid_nr(current), current_thread_info(), current, task_thread_info(current)); /* * When in-kernel, we also print out the stack and code at the * time of the fault.. */ - if (in_kernel) { + if (!user_mode_vm(regs)) { u8 *eip; unsigned int code_prologue = code_bytes * 43 / 64; unsigned int code_len = code_bytes; unsigned char c; printk("\n" KERN_EMERG "Stack: "); - show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG); + show_stack_log_lvl(NULL, regs, ®s->esp, KERN_EMERG); printk(KERN_EMERG "Code: "); @@ -374,11 +353,11 @@ int is_valid_bugaddr(unsigned long eip) void die(const char * str, struct pt_regs * regs, long err) { static struct { - spinlock_t lock; + raw_spinlock_t lock; u32 lock_owner; int lock_owner_depth; } die = { - .lock = __SPIN_LOCK_UNLOCKED(die.lock), + .lock = __RAW_SPIN_LOCK_UNLOCKED, .lock_owner = -1, .lock_owner_depth = 0 }; @@ -389,40 +368,34 @@ void die(const char * str, struct pt_regs * regs, long err) if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); - spin_lock_irqsave(&die.lock, flags); + __raw_spin_lock(&die.lock); + raw_local_save_flags(flags); die.lock_owner = smp_processor_id(); die.lock_owner_depth = 0; bust_spinlocks(1); } else - local_save_flags(flags); + raw_local_save_flags(flags); if (++die.lock_owner_depth < 3) { - int nl = 0; unsigned long esp; unsigned short ss; report_bug(regs->eip, regs); - printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); + printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, + ++die_counter); #ifdef CONFIG_PREEMPT - printk(KERN_EMERG "PREEMPT "); - nl = 1; + printk("PREEMPT "); #endif #ifdef CONFIG_SMP - if (!nl) - printk(KERN_EMERG); printk("SMP "); - nl = 1; #endif #ifdef CONFIG_DEBUG_PAGEALLOC - if (!nl) - printk(KERN_EMERG); printk("DEBUG_PAGEALLOC"); - nl = 1; #endif - if (nl) - printk("\n"); + printk("\n"); + if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) { @@ -446,7 +419,8 @@ void die(const char * str, struct pt_regs * regs, long err) bust_spinlocks(0); die.lock_owner = -1; add_taint(TAINT_DIE); - spin_unlock_irqrestore(&die.lock, flags); + __raw_spin_unlock(&die.lock); + raw_local_irq_restore(flags); if (!regs) return; @@ -629,7 +603,7 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs, printk_ratelimit()) printk(KERN_INFO "%s[%d] general protection eip:%lx esp:%lx error:%lx\n", - current->comm, current->pid, + current->comm, task_pid_nr(current), regs->eip, regs->esp, error_code); force_sig(SIGSEGV, current); @@ -815,6 +789,8 @@ void restart_nmi(void) #ifdef CONFIG_KPROBES fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) { + trace_hardirqs_fixup(); + if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) return; @@ -1112,20 +1088,6 @@ asmlinkage void math_emulate(long arg) #endif /* CONFIG_MATH_EMULATION */ -#ifdef CONFIG_X86_F00F_BUG -void __init trap_init_f00f_bug(void) -{ - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); - - /* - * Update the IDT descriptor and reload the IDT so that - * it uses the read-only mapped virtual address. - */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); - load_idt(&idt_descr); -} -#endif - /* * This needs to use 'idt_table' rather than 'idt', and * thus use the _nonmapped_ version of the IDT, as the @@ -1163,6 +1125,8 @@ static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) void __init trap_init(void) { + int i; + #ifdef CONFIG_EISA void __iomem *p = ioremap(0x0FFFD9, 4); if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) { @@ -1222,6 +1186,11 @@ void __init trap_init(void) set_system_gate(SYSCALL_VECTOR,&system_call); + /* Reserve all the builtin and the syscall vector. */ + for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) + set_bit(i, used_vectors); + set_bit(SYSCALL_VECTOR, used_vectors); + /* * Should be a barrier for any external CPU state. */ diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index bc7116acf8ff..4a6bd4965f56 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -201,7 +201,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, #define MSG(txt) ops->warning(data, txt) /* - * x86-64 can have upto three kernel stacks: + * x86-64 can have up to three kernel stacks: * process stack * interrupt stack * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack @@ -215,7 +215,7 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack, - struct stacktrace_ops *ops, void *data) + const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; @@ -336,7 +336,7 @@ static void print_trace_address(void *data, unsigned long addr) printk_address(addr); } -static struct stacktrace_ops print_trace_ops = { +static const struct stacktrace_ops print_trace_ops = { .warning = print_trace_warning, .warning_symbol = print_trace_warning_symbol, .stack = print_trace_stack, @@ -462,7 +462,7 @@ void out_of_line_bug(void) EXPORT_SYMBOL(out_of_line_bug); #endif -static DEFINE_SPINLOCK(die_lock); +static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; static int die_owner = -1; static unsigned int die_nest_count; @@ -474,13 +474,13 @@ unsigned __kprobes long oops_begin(void) oops_enter(); /* racy, but better than risking deadlock. */ - local_irq_save(flags); + raw_local_irq_save(flags); cpu = smp_processor_id(); - if (!spin_trylock(&die_lock)) { + if (!__raw_spin_trylock(&die_lock)) { if (cpu == die_owner) /* nested oops. should stop eventually */; else - spin_lock(&die_lock); + __raw_spin_lock(&die_lock); } die_nest_count++; die_owner = cpu; @@ -494,12 +494,10 @@ void __kprobes oops_end(unsigned long flags) die_owner = -1; bust_spinlocks(0); die_nest_count--; - if (die_nest_count) - /* We still own the lock */ - local_irq_restore(flags); - else + if (!die_nest_count) /* Nest count reaches zero, release the lock. */ - spin_unlock_irqrestore(&die_lock, flags); + __raw_spin_unlock(&die_lock); + raw_local_irq_restore(flags); if (panic_on_oops) panic("Fatal exception"); oops_exit(); @@ -809,6 +807,8 @@ asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs) /* runs on IST stack. */ asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) { + trace_hardirqs_fixup(); + if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { return; } diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index b85ad754f70e..9ebc0dab66b4 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -59,7 +59,7 @@ int check_tsc_unstable(void) } EXPORT_SYMBOL_GPL(check_tsc_unstable); -/* Accellerators for sched_clock() +/* Accelerators for sched_clock() * convert from cycles(64bits) => nanoseconds (64bits) * basic equation: * ns = cycles / (freq / ns_per_sec) @@ -74,7 +74,7 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable); * And since SC is a constant power of two, we can convert the div * into a shift. * - * We can use khz divisor instead of mhz to keep a better percision, since + * We can use khz divisor instead of mhz to keep a better precision, since * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. * (mathieu.desnoyers@polymtl.ca) * @@ -131,38 +131,43 @@ unsigned long native_calculate_cpu_khz(void) { unsigned long long start, end; unsigned long count; - u64 delta64; + u64 delta64 = (u64)ULLONG_MAX; int i; unsigned long flags; local_irq_save(flags); - /* run 3 times to ensure the cache is warm */ + /* run 3 times to ensure the cache is warm and to get an accurate reading */ for (i = 0; i < 3; i++) { mach_prepare_counter(); rdtscll(start); mach_countup(&count); rdtscll(end); - } - /* - * Error: ECTCNEVERSET - * The CTC wasn't reliable: we got a hit on the very first read, - * or the CPU was so fast/slow that the quotient wouldn't fit in - * 32 bits.. - */ - if (count <= 1) - goto err; - delta64 = end - start; + /* + * Error: ECTCNEVERSET + * The CTC wasn't reliable: we got a hit on the very first read, + * or the CPU was so fast/slow that the quotient wouldn't fit in + * 32 bits.. + */ + if (count <= 1) + continue; + + /* cpu freq too slow: */ + if ((end - start) <= CALIBRATE_TIME_MSEC) + continue; + + /* + * We want the minimum time of all runs in case one of them + * is inaccurate due to SMI or other delay + */ + delta64 = min(delta64, (end - start)); + } - /* cpu freq too fast: */ + /* cpu freq too fast (or every run was bad): */ if (delta64 > (1ULL<<32)) goto err; - /* cpu freq too slow: */ - if (delta64 <= CALIBRATE_TIME_MSEC) - goto err; - delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */ do_div(delta64,CALIBRATE_TIME_MSEC); @@ -181,8 +186,8 @@ int recalibrate_cpu_khz(void) if (cpu_has_tsc) { cpu_khz = calculate_cpu_khz(); tsc_khz = cpu_khz; - cpu_data[0].loops_per_jiffy = - cpufreq_scale(cpu_data[0].loops_per_jiffy, + cpu_data(0).loops_per_jiffy = + cpufreq_scale(cpu_data(0).loops_per_jiffy, cpu_khz_old, cpu_khz); return 0; } else @@ -215,7 +220,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) return 0; } ref_freq = freq->old; - loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; + loops_per_jiffy_ref = cpu_data(freq->cpu).loops_per_jiffy; cpu_khz_ref = cpu_khz; } @@ -223,7 +228,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || (val == CPUFREQ_RESUMECHANGE)) { if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - cpu_data[freq->cpu].loops_per_jiffy = + cpu_data(freq->cpu).loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); @@ -349,10 +354,10 @@ __cpuinit int unsynchronized_tsc(void) static void __init check_geode_tsc_reliable(void) { - unsigned long val; + unsigned long res_low, res_high; - rdmsrl(MSR_GEODE_BUSCONT_CONF0, val); - if ((val & RTSC_SUSP)) + rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); + if (res_low & RTSC_SUSP) clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; } #else diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index 9f22e542c374..9c70af45b42b 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c @@ -73,13 +73,13 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, struct cpufreq_freqs *freq = data; unsigned long *lpj, dummy; - if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC)) + if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) return 0; lpj = &dummy; if (!(freq->flags & CPUFREQ_CONST_LOOPS)) #ifdef CONFIG_SMP - lpj = &cpu_data[freq->cpu].loops_per_jiffy; + lpj = &cpu_data(freq->cpu).loops_per_jiffy; #else lpj = &boot_cpu_data.loops_per_jiffy; #endif diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 18673e0f193b..f02bad68abaa 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -134,21 +134,21 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, unsigned long eip, unsigned len) { switch (type) { - case PARAVIRT_PATCH(irq_disable): + case PARAVIRT_PATCH(pv_irq_ops.irq_disable): return patch_internal(VMI_CALL_DisableInterrupts, len, insns, eip); - case PARAVIRT_PATCH(irq_enable): + case PARAVIRT_PATCH(pv_irq_ops.irq_enable): return patch_internal(VMI_CALL_EnableInterrupts, len, insns, eip); - case PARAVIRT_PATCH(restore_fl): + case PARAVIRT_PATCH(pv_irq_ops.restore_fl): return patch_internal(VMI_CALL_SetInterruptMask, len, insns, eip); - case PARAVIRT_PATCH(save_fl): + case PARAVIRT_PATCH(pv_irq_ops.save_fl): return patch_internal(VMI_CALL_GetInterruptMask, len, insns, eip); - case PARAVIRT_PATCH(iret): + case PARAVIRT_PATCH(pv_cpu_ops.iret): return patch_internal(VMI_CALL_IRET, len, insns, eip); - case PARAVIRT_PATCH(irq_enable_sysexit): + case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit): return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip); default: break; @@ -552,24 +552,22 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, } #endif -static void vmi_set_lazy_mode(enum paravirt_lazy_mode mode) +static void vmi_enter_lazy_cpu(void) { - static DEFINE_PER_CPU(enum paravirt_lazy_mode, lazy_mode); - - if (!vmi_ops.set_lazy_mode) - return; + paravirt_enter_lazy_cpu(); + vmi_ops.set_lazy_mode(2); +} - /* Modes should never nest or overlap */ - BUG_ON(__get_cpu_var(lazy_mode) && !(mode == PARAVIRT_LAZY_NONE || - mode == PARAVIRT_LAZY_FLUSH)); +static void vmi_enter_lazy_mmu(void) +{ + paravirt_enter_lazy_mmu(); + vmi_ops.set_lazy_mode(1); +} - if (mode == PARAVIRT_LAZY_FLUSH) { - vmi_ops.set_lazy_mode(0); - vmi_ops.set_lazy_mode(__get_cpu_var(lazy_mode)); - } else { - vmi_ops.set_lazy_mode(mode); - __get_cpu_var(lazy_mode) = mode; - } +static void vmi_leave_lazy(void) +{ + paravirt_leave_lazy(paravirt_get_lazy_mode()); + vmi_ops.set_lazy_mode(0); } static inline int __init check_vmi_rom(struct vrom_header *rom) @@ -690,9 +688,9 @@ do { \ reloc = call_vrom_long_func(vmi_rom, get_reloc, \ VMI_CALL_##vmicall); \ if (rel->type == VMI_RELOCATION_CALL_REL) \ - paravirt_ops.opname = (void *)rel->eip; \ + opname = (void *)rel->eip; \ else if (rel->type == VMI_RELOCATION_NOP) \ - paravirt_ops.opname = (void *)vmi_nop; \ + opname = (void *)vmi_nop; \ else if (rel->type != VMI_RELOCATION_NONE) \ printk(KERN_WARNING "VMI: Unknown relocation " \ "type %d for " #vmicall"\n",\ @@ -712,7 +710,7 @@ do { \ VMI_CALL_##vmicall); \ BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); \ if (rel->type == VMI_RELOCATION_CALL_REL) { \ - paravirt_ops.opname = wrapper; \ + opname = wrapper; \ vmi_ops.cache = (void *)rel->eip; \ } \ } while (0) @@ -732,11 +730,11 @@ static inline int __init activate_vmi(void) } savesegment(cs, kernel_cs); - paravirt_ops.paravirt_enabled = 1; - paravirt_ops.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; + pv_info.paravirt_enabled = 1; + pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; + pv_info.name = "vmi"; - paravirt_ops.patch = vmi_patch; - paravirt_ops.name = "vmi"; + pv_init_ops.patch = vmi_patch; /* * Many of these operations are ABI compatible with VMI. @@ -754,26 +752,26 @@ static inline int __init activate_vmi(void) */ /* CPUID is special, so very special it gets wrapped like a present */ - para_wrap(cpuid, vmi_cpuid, cpuid, CPUID); - - para_fill(clts, CLTS); - para_fill(get_debugreg, GetDR); - para_fill(set_debugreg, SetDR); - para_fill(read_cr0, GetCR0); - para_fill(read_cr2, GetCR2); - para_fill(read_cr3, GetCR3); - para_fill(read_cr4, GetCR4); - para_fill(write_cr0, SetCR0); - para_fill(write_cr2, SetCR2); - para_fill(write_cr3, SetCR3); - para_fill(write_cr4, SetCR4); - para_fill(save_fl, GetInterruptMask); - para_fill(restore_fl, SetInterruptMask); - para_fill(irq_disable, DisableInterrupts); - para_fill(irq_enable, EnableInterrupts); - - para_fill(wbinvd, WBINVD); - para_fill(read_tsc, RDTSC); + para_wrap(pv_cpu_ops.cpuid, vmi_cpuid, cpuid, CPUID); + + para_fill(pv_cpu_ops.clts, CLTS); + para_fill(pv_cpu_ops.get_debugreg, GetDR); + para_fill(pv_cpu_ops.set_debugreg, SetDR); + para_fill(pv_cpu_ops.read_cr0, GetCR0); + para_fill(pv_mmu_ops.read_cr2, GetCR2); + para_fill(pv_mmu_ops.read_cr3, GetCR3); + para_fill(pv_cpu_ops.read_cr4, GetCR4); + para_fill(pv_cpu_ops.write_cr0, SetCR0); + para_fill(pv_mmu_ops.write_cr2, SetCR2); + para_fill(pv_mmu_ops.write_cr3, SetCR3); + para_fill(pv_cpu_ops.write_cr4, SetCR4); + para_fill(pv_irq_ops.save_fl, GetInterruptMask); + para_fill(pv_irq_ops.restore_fl, SetInterruptMask); + para_fill(pv_irq_ops.irq_disable, DisableInterrupts); + para_fill(pv_irq_ops.irq_enable, EnableInterrupts); + + para_fill(pv_cpu_ops.wbinvd, WBINVD); + para_fill(pv_cpu_ops.read_tsc, RDTSC); /* The following we emulate with trap and emulate for now */ /* paravirt_ops.read_msr = vmi_rdmsr */ @@ -781,29 +779,38 @@ static inline int __init activate_vmi(void) /* paravirt_ops.rdpmc = vmi_rdpmc */ /* TR interface doesn't pass TR value, wrap */ - para_wrap(load_tr_desc, vmi_set_tr, set_tr, SetTR); + para_wrap(pv_cpu_ops.load_tr_desc, vmi_set_tr, set_tr, SetTR); /* LDT is special, too */ - para_wrap(set_ldt, vmi_set_ldt, _set_ldt, SetLDT); - - para_fill(load_gdt, SetGDT); - para_fill(load_idt, SetIDT); - para_fill(store_gdt, GetGDT); - para_fill(store_idt, GetIDT); - para_fill(store_tr, GetTR); - paravirt_ops.load_tls = vmi_load_tls; - para_fill(write_ldt_entry, WriteLDTEntry); - para_fill(write_gdt_entry, WriteGDTEntry); - para_fill(write_idt_entry, WriteIDTEntry); - para_wrap(load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack); - para_fill(set_iopl_mask, SetIOPLMask); - para_fill(io_delay, IODelay); - para_wrap(set_lazy_mode, vmi_set_lazy_mode, set_lazy_mode, SetLazyMode); + para_wrap(pv_cpu_ops.set_ldt, vmi_set_ldt, _set_ldt, SetLDT); + + para_fill(pv_cpu_ops.load_gdt, SetGDT); + para_fill(pv_cpu_ops.load_idt, SetIDT); + para_fill(pv_cpu_ops.store_gdt, GetGDT); + para_fill(pv_cpu_ops.store_idt, GetIDT); + para_fill(pv_cpu_ops.store_tr, GetTR); + pv_cpu_ops.load_tls = vmi_load_tls; + para_fill(pv_cpu_ops.write_ldt_entry, WriteLDTEntry); + para_fill(pv_cpu_ops.write_gdt_entry, WriteGDTEntry); + para_fill(pv_cpu_ops.write_idt_entry, WriteIDTEntry); + para_wrap(pv_cpu_ops.load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack); + para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); + para_fill(pv_cpu_ops.io_delay, IODelay); + + para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu, + set_lazy_mode, SetLazyMode); + para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy, + set_lazy_mode, SetLazyMode); + + para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, + set_lazy_mode, SetLazyMode); + para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy, + set_lazy_mode, SetLazyMode); /* user and kernel flush are just handled with different flags to FlushTLB */ - para_wrap(flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB); - para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB); - para_fill(flush_tlb_single, InvalPage); + para_wrap(pv_mmu_ops.flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB); + para_wrap(pv_mmu_ops.flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB); + para_fill(pv_mmu_ops.flush_tlb_single, InvalPage); /* * Until a standard flag format can be agreed on, we need to @@ -819,41 +826,41 @@ static inline int __init activate_vmi(void) #endif if (vmi_ops.set_pte) { - paravirt_ops.set_pte = vmi_set_pte; - paravirt_ops.set_pte_at = vmi_set_pte_at; - paravirt_ops.set_pmd = vmi_set_pmd; + pv_mmu_ops.set_pte = vmi_set_pte; + pv_mmu_ops.set_pte_at = vmi_set_pte_at; + pv_mmu_ops.set_pmd = vmi_set_pmd; #ifdef CONFIG_X86_PAE - paravirt_ops.set_pte_atomic = vmi_set_pte_atomic; - paravirt_ops.set_pte_present = vmi_set_pte_present; - paravirt_ops.set_pud = vmi_set_pud; - paravirt_ops.pte_clear = vmi_pte_clear; - paravirt_ops.pmd_clear = vmi_pmd_clear; + pv_mmu_ops.set_pte_atomic = vmi_set_pte_atomic; + pv_mmu_ops.set_pte_present = vmi_set_pte_present; + pv_mmu_ops.set_pud = vmi_set_pud; + pv_mmu_ops.pte_clear = vmi_pte_clear; + pv_mmu_ops.pmd_clear = vmi_pmd_clear; #endif } if (vmi_ops.update_pte) { - paravirt_ops.pte_update = vmi_update_pte; - paravirt_ops.pte_update_defer = vmi_update_pte_defer; + pv_mmu_ops.pte_update = vmi_update_pte; + pv_mmu_ops.pte_update_defer = vmi_update_pte_defer; } vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); if (vmi_ops.allocate_page) { - paravirt_ops.alloc_pt = vmi_allocate_pt; - paravirt_ops.alloc_pd = vmi_allocate_pd; - paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone; + pv_mmu_ops.alloc_pt = vmi_allocate_pt; + pv_mmu_ops.alloc_pd = vmi_allocate_pd; + pv_mmu_ops.alloc_pd_clone = vmi_allocate_pd_clone; } vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); if (vmi_ops.release_page) { - paravirt_ops.release_pt = vmi_release_pt; - paravirt_ops.release_pd = vmi_release_pd; + pv_mmu_ops.release_pt = vmi_release_pt; + pv_mmu_ops.release_pd = vmi_release_pd; } /* Set linear is needed in all cases */ vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); #ifdef CONFIG_HIGHPTE if (vmi_ops.set_linear_mapping) - paravirt_ops.kmap_atomic_pte = vmi_kmap_atomic_pte; + pv_mmu_ops.kmap_atomic_pte = vmi_kmap_atomic_pte; #endif /* @@ -863,17 +870,17 @@ static inline int __init activate_vmi(void) * the backend. They are performance critical anyway, so requiring * a patch is not a big problem. */ - paravirt_ops.irq_enable_sysexit = (void *)0xfeedbab0; - paravirt_ops.iret = (void *)0xbadbab0; + pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0; + pv_cpu_ops.iret = (void *)0xbadbab0; #ifdef CONFIG_SMP - para_wrap(startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState); + para_wrap(pv_apic_ops.startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState); #endif #ifdef CONFIG_X86_LOCAL_APIC - para_fill(apic_read, APICRead); - para_fill(apic_write, APICWrite); - para_fill(apic_write_atomic, APICWrite); + para_fill(pv_apic_ops.apic_read, APICRead); + para_fill(pv_apic_ops.apic_write, APICWrite); + para_fill(pv_apic_ops.apic_write_atomic, APICWrite); #endif /* @@ -891,15 +898,15 @@ static inline int __init activate_vmi(void) vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm); vmi_timer_ops.cancel_alarm = vmi_get_function(VMI_CALL_CancelAlarm); - paravirt_ops.time_init = vmi_time_init; - paravirt_ops.get_wallclock = vmi_get_wallclock; - paravirt_ops.set_wallclock = vmi_set_wallclock; + pv_time_ops.time_init = vmi_time_init; + pv_time_ops.get_wallclock = vmi_get_wallclock; + pv_time_ops.set_wallclock = vmi_set_wallclock; #ifdef CONFIG_X86_LOCAL_APIC - paravirt_ops.setup_boot_clock = vmi_time_bsp_init; - paravirt_ops.setup_secondary_clock = vmi_time_ap_init; + pv_apic_ops.setup_boot_clock = vmi_time_bsp_init; + pv_apic_ops.setup_secondary_clock = vmi_time_ap_init; #endif - paravirt_ops.sched_clock = vmi_sched_clock; - paravirt_ops.get_cpu_khz = vmi_cpu_khz; + pv_time_ops.sched_clock = vmi_sched_clock; + pv_time_ops.get_cpu_khz = vmi_cpu_khz; /* We have true wallclock functions; disable CMOS clock sync */ no_sync_cmos_clock = 1; @@ -908,7 +915,7 @@ static inline int __init activate_vmi(void) disable_vmi_timer = 1; } - para_fill(safe_halt, Halt); + para_fill(pv_irq_ops.safe_halt, Halt); /* * Alternative instruction rewriting doesn't happen soon enough diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 93847d848157..ad4005c6d4a1 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -48,12 +48,12 @@ ({unsigned long v; \ extern char __vsyscall_0; \ asm("" : "=r" (v) : "0" (x)); \ - ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); }) + ((v - VSYSCALL_START) + __pa_symbol(&__vsyscall_0)); }) /* * vsyscall_gtod_data contains data that is : * - readonly from vsyscalls - * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64) + * - written by timer interrupt or systcl (/proc/sys/kernel/vsyscall64) * Try to keep this structure as small as possible to avoid cache line ping pongs */ int __vgetcpu_mode __section_vgetcpu_mode; @@ -64,6 +64,16 @@ struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data = .sysctl_enabled = 1, }; +void update_vsyscall_tz(void) +{ + unsigned long flags; + + write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); + /* sys_tz has changed */ + vsyscall_gtod_data.sys_tz = sys_tz; + write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); +} + void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) { unsigned long flags; @@ -77,8 +87,6 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) vsyscall_gtod_data.clock.shift = clock->shift; vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; - vsyscall_gtod_data.sys_tz = sys_tz; - vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } @@ -164,7 +172,7 @@ time_t __vsyscall(1) vtime(time_t *t) if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) return time_syscall(t); - vgettimeofday(&tv, 0); + vgettimeofday(&tv, NULL); result = tv.tv_sec; if (t) *t = result; @@ -258,18 +266,10 @@ out: return ret; } -static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - return -ENOSYS; -} - static ctl_table kernel_table2[] = { - { .ctl_name = 99, .procname = "vsyscall64", + { .procname = "vsyscall64", .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), .mode = 0644, - .strategy = vsyscall_sysctl_nostrat, .proc_handler = vsyscall_sysctl_change }, {} }; @@ -289,9 +289,9 @@ static void __cpuinit vsyscall_set_cpu(int cpu) unsigned long *d; unsigned long node = 0; #ifdef CONFIG_NUMA - node = cpu_to_node[cpu]; + node = cpu_to_node(cpu); #endif - if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) + if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) write_rdtscp_aux((node << 12) | cpu); /* Store cpu number in limit so that it can be loaded quickly diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig new file mode 100644 index 000000000000..c4dffbeea5e1 --- /dev/null +++ b/arch/x86/lguest/Kconfig @@ -0,0 +1,14 @@ +config LGUEST_GUEST + bool "Lguest guest support" + select PARAVIRT + depends on !X86_PAE + select VIRTIO + select VIRTIO_RING + select VIRTIO_CONSOLE + help + Lguest is a tiny in-kernel hypervisor. Selecting this will + allow your kernel to boot under lguest. This option will increase + your kernel size by about 6k. If in doubt, say N. + + If you say Y here, make sure you say Y (or M) to the virtio block + and net drivers which lguest needs. diff --git a/arch/x86/lguest/Makefile b/arch/x86/lguest/Makefile new file mode 100644 index 000000000000..27f0c9ed7f60 --- /dev/null +++ b/arch/x86/lguest/Makefile @@ -0,0 +1 @@ +obj-y := i386_head.o boot.o diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c new file mode 100644 index 000000000000..92c56117eae5 --- /dev/null +++ b/arch/x86/lguest/boot.c @@ -0,0 +1,1071 @@ +/*P:010 + * A hypervisor allows multiple Operating Systems to run on a single machine. + * To quote David Wheeler: "Any problem in computer science can be solved with + * another layer of indirection." + * + * We keep things simple in two ways. First, we start with a normal Linux + * kernel and insert a module (lg.ko) which allows us to run other Linux + * kernels the same way we'd run processes. We call the first kernel the Host, + * and the others the Guests. The program which sets up and configures Guests + * (such as the example in Documentation/lguest/lguest.c) is called the + * Launcher. + * + * Secondly, we only run specially modified Guests, not normal kernels. When + * you set CONFIG_LGUEST to 'y' or 'm', this automatically sets + * CONFIG_LGUEST_GUEST=y, which compiles this file into the kernel so it knows + * how to be a Guest. This means that you can use the same kernel you boot + * normally (ie. as a Host) as a Guest. + * + * These Guests know that they cannot do privileged operations, such as disable + * interrupts, and that they have to ask the Host to do such things explicitly. + * This file consists of all the replacements for such low-level native + * hardware operations: these special Guest versions call the Host. + * + * So how does the kernel know it's a Guest? The Guest starts at a special + * entry point marked with a magic string, which sets up a few things then + * calls here. We replace the native functions various "paravirt" structures + * with our Guest versions, then boot like normal. :*/ + +/* + * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <linux/kernel.h> +#include <linux/start_kernel.h> +#include <linux/string.h> +#include <linux/console.h> +#include <linux/screen_info.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/clocksource.h> +#include <linux/clockchips.h> +#include <linux/lguest.h> +#include <linux/lguest_launcher.h> +#include <linux/virtio_console.h> +#include <linux/pm.h> +#include <asm/paravirt.h> +#include <asm/param.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/desc.h> +#include <asm/setup.h> +#include <asm/e820.h> +#include <asm/mce.h> +#include <asm/io.h> +#include <asm/i387.h> + +/*G:010 Welcome to the Guest! + * + * The Guest in our tale is a simple creature: identical to the Host but + * behaving in simplified but equivalent ways. In particular, the Guest is the + * same kernel as the Host (or at least, built from the same source code). :*/ + +/* Declarations for definitions in lguest_guest.S */ +extern char lguest_noirq_start[], lguest_noirq_end[]; +extern const char lgstart_cli[], lgend_cli[]; +extern const char lgstart_sti[], lgend_sti[]; +extern const char lgstart_popf[], lgend_popf[]; +extern const char lgstart_pushf[], lgend_pushf[]; +extern const char lgstart_iret[], lgend_iret[]; +extern void lguest_iret(void); + +struct lguest_data lguest_data = { + .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF }, + .noirq_start = (u32)lguest_noirq_start, + .noirq_end = (u32)lguest_noirq_end, + .kernel_address = PAGE_OFFSET, + .blocked_interrupts = { 1 }, /* Block timer interrupts */ + .syscall_vec = SYSCALL_VECTOR, +}; +static cycle_t clock_base; + +/*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a + * ring buffer of stored hypercalls which the Host will run though next time we + * do a normal hypercall. Each entry in the ring has 4 slots for the hypercall + * arguments, and a "hcall_status" word which is 0 if the call is ready to go, + * and 255 once the Host has finished with it. + * + * If we come around to a slot which hasn't been finished, then the table is + * full and we just make the hypercall directly. This has the nice side + * effect of causing the Host to run all the stored calls in the ring buffer + * which empties it for next time! */ +static void async_hcall(unsigned long call, unsigned long arg1, + unsigned long arg2, unsigned long arg3) +{ + /* Note: This code assumes we're uniprocessor. */ + static unsigned int next_call; + unsigned long flags; + + /* Disable interrupts if not already disabled: we don't want an + * interrupt handler making a hypercall while we're already doing + * one! */ + local_irq_save(flags); + if (lguest_data.hcall_status[next_call] != 0xFF) { + /* Table full, so do normal hcall which will flush table. */ + hcall(call, arg1, arg2, arg3); + } else { + lguest_data.hcalls[next_call].arg0 = call; + lguest_data.hcalls[next_call].arg1 = arg1; + lguest_data.hcalls[next_call].arg2 = arg2; + lguest_data.hcalls[next_call].arg3 = arg3; + /* Arguments must all be written before we mark it to go */ + wmb(); + lguest_data.hcall_status[next_call] = 0; + if (++next_call == LHCALL_RING_SIZE) + next_call = 0; + } + local_irq_restore(flags); +} + +/*G:035 Notice the lazy_hcall() above, rather than hcall(). This is our first + * real optimization trick! + * + * When lazy_mode is set, it means we're allowed to defer all hypercalls and do + * them as a batch when lazy_mode is eventually turned off. Because hypercalls + * are reasonably expensive, batching them up makes sense. For example, a + * large munmap might update dozens of page table entries: that code calls + * paravirt_enter_lazy_mmu(), does the dozen updates, then calls + * lguest_leave_lazy_mode(). + * + * So, when we're in lazy mode, we call async_hcall() to store the call for + * future processing. */ +static void lazy_hcall(unsigned long call, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3) +{ + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) + hcall(call, arg1, arg2, arg3); + else + async_hcall(call, arg1, arg2, arg3); +} + +/* When lazy mode is turned off reset the per-cpu lazy mode variable and then + * issue a hypercall to flush any stored calls. */ +static void lguest_leave_lazy_mode(void) +{ + paravirt_leave_lazy(paravirt_get_lazy_mode()); + hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); +} + +/*G:033 + * After that diversion we return to our first native-instruction + * replacements: four functions for interrupt control. + * + * The simplest way of implementing these would be to have "turn interrupts + * off" and "turn interrupts on" hypercalls. Unfortunately, this is too slow: + * these are by far the most commonly called functions of those we override. + * + * So instead we keep an "irq_enabled" field inside our "struct lguest_data", + * which the Guest can update with a single instruction. The Host knows to + * check there when it wants to deliver an interrupt. + */ + +/* save_flags() is expected to return the processor state (ie. "eflags"). The + * eflags word contains all kind of stuff, but in practice Linux only cares + * about the interrupt flag. Our "save_flags()" just returns that. */ +static unsigned long save_fl(void) +{ + return lguest_data.irq_enabled; +} + +/* restore_flags() just sets the flags back to the value given. */ +static void restore_fl(unsigned long flags) +{ + lguest_data.irq_enabled = flags; +} + +/* Interrupts go off... */ +static void irq_disable(void) +{ + lguest_data.irq_enabled = 0; +} + +/* Interrupts go on... */ +static void irq_enable(void) +{ + lguest_data.irq_enabled = X86_EFLAGS_IF; +} +/*:*/ +/*M:003 Note that we don't check for outstanding interrupts when we re-enable + * them (or when we unmask an interrupt). This seems to work for the moment, + * since interrupts are rare and we'll just get the interrupt on the next timer + * tick, but when we turn on CONFIG_NO_HZ, we should revisit this. One way + * would be to put the "irq_enabled" field in a page by itself, and have the + * Host write-protect it when an interrupt comes in when irqs are disabled. + * There will then be a page fault as soon as interrupts are re-enabled. :*/ + +/*G:034 + * The Interrupt Descriptor Table (IDT). + * + * The IDT tells the processor what to do when an interrupt comes in. Each + * entry in the table is a 64-bit descriptor: this holds the privilege level, + * address of the handler, and... well, who cares? The Guest just asks the + * Host to make the change anyway, because the Host controls the real IDT. + */ +static void lguest_write_idt_entry(struct desc_struct *dt, + int entrynum, u32 low, u32 high) +{ + /* Keep the local copy up to date. */ + write_dt_entry(dt, entrynum, low, high); + /* Tell Host about this new entry. */ + hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, low, high); +} + +/* Changing to a different IDT is very rare: we keep the IDT up-to-date every + * time it is written, so we can simply loop through all entries and tell the + * Host about them. */ +static void lguest_load_idt(const struct Xgt_desc_struct *desc) +{ + unsigned int i; + struct desc_struct *idt = (void *)desc->address; + + for (i = 0; i < (desc->size+1)/8; i++) + hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b); +} + +/* + * The Global Descriptor Table. + * + * The Intel architecture defines another table, called the Global Descriptor + * Table (GDT). You tell the CPU where it is (and its size) using the "lgdt" + * instruction, and then several other instructions refer to entries in the + * table. There are three entries which the Switcher needs, so the Host simply + * controls the entire thing and the Guest asks it to make changes using the + * LOAD_GDT hypercall. + * + * This is the opposite of the IDT code where we have a LOAD_IDT_ENTRY + * hypercall and use that repeatedly to load a new IDT. I don't think it + * really matters, but wouldn't it be nice if they were the same? + */ +static void lguest_load_gdt(const struct Xgt_desc_struct *desc) +{ + BUG_ON((desc->size+1)/8 != GDT_ENTRIES); + hcall(LHCALL_LOAD_GDT, __pa(desc->address), GDT_ENTRIES, 0); +} + +/* For a single GDT entry which changes, we do the lazy thing: alter our GDT, + * then tell the Host to reload the entire thing. This operation is so rare + * that this naive implementation is reasonable. */ +static void lguest_write_gdt_entry(struct desc_struct *dt, + int entrynum, u32 low, u32 high) +{ + write_dt_entry(dt, entrynum, low, high); + hcall(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES, 0); +} + +/* OK, I lied. There are three "thread local storage" GDT entries which change + * on every context switch (these three entries are how glibc implements + * __thread variables). So we have a hypercall specifically for this case. */ +static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) +{ + /* There's one problem which normal hardware doesn't have: the Host + * can't handle us removing entries we're currently using. So we clear + * the GS register here: if it's needed it'll be reloaded anyway. */ + loadsegment(gs, 0); + lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0); +} + +/*G:038 That's enough excitement for now, back to ploughing through each of + * the different pv_ops structures (we're about 1/3 of the way through). + * + * This is the Local Descriptor Table, another weird Intel thingy. Linux only + * uses this for some strange applications like Wine. We don't do anything + * here, so they'll get an informative and friendly Segmentation Fault. */ +static void lguest_set_ldt(const void *addr, unsigned entries) +{ +} + +/* This loads a GDT entry into the "Task Register": that entry points to a + * structure called the Task State Segment. Some comments scattered though the + * kernel code indicate that this used for task switching in ages past, along + * with blood sacrifice and astrology. + * + * Now there's nothing interesting in here that we don't get told elsewhere. + * But the native version uses the "ltr" instruction, which makes the Host + * complain to the Guest about a Segmentation Fault and it'll oops. So we + * override the native version with a do-nothing version. */ +static void lguest_load_tr_desc(void) +{ +} + +/* The "cpuid" instruction is a way of querying both the CPU identity + * (manufacturer, model, etc) and its features. It was introduced before the + * Pentium in 1993 and keeps getting extended by both Intel and AMD. As you + * might imagine, after a decade and a half this treatment, it is now a giant + * ball of hair. Its entry in the current Intel manual runs to 28 pages. + * + * This instruction even it has its own Wikipedia entry. The Wikipedia entry + * has been translated into 4 languages. I am not making this up! + * + * We could get funky here and identify ourselves as "GenuineLguest", but + * instead we just use the real "cpuid" instruction. Then I pretty much turned + * off feature bits until the Guest booted. (Don't say that: you'll damage + * lguest sales!) Shut up, inner voice! (Hey, just pointing out that this is + * hardly future proof.) Noone's listening! They don't like you anyway, + * parenthetic weirdo! + * + * Replacing the cpuid so we can turn features off is great for the kernel, but + * anyone (including userspace) can just use the raw "cpuid" instruction and + * the Host won't even notice since it isn't privileged. So we try not to get + * too worked up about it. */ +static void lguest_cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + int function = *eax; + + native_cpuid(eax, ebx, ecx, edx); + switch (function) { + case 1: /* Basic feature request. */ + /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ + *ecx &= 0x00002201; + /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */ + *edx &= 0x07808101; + /* The Host can do a nice optimization if it knows that the + * kernel mappings (addresses above 0xC0000000 or whatever + * PAGE_OFFSET is set to) haven't changed. But Linux calls + * flush_tlb_user() for both user and kernel mappings unless + * the Page Global Enable (PGE) feature bit is set. */ + *edx |= 0x00002000; + break; + case 0x80000000: + /* Futureproof this a little: if they ask how much extended + * processor information there is, limit it to known fields. */ + if (*eax > 0x80000008) + *eax = 0x80000008; + break; + } +} + +/* Intel has four control registers, imaginatively named cr0, cr2, cr3 and cr4. + * I assume there's a cr1, but it hasn't bothered us yet, so we'll not bother + * it. The Host needs to know when the Guest wants to change them, so we have + * a whole series of functions like read_cr0() and write_cr0(). + * + * We start with cr0. cr0 allows you to turn on and off all kinds of basic + * features, but Linux only really cares about one: the horrifically-named Task + * Switched (TS) bit at bit 3 (ie. 8) + * + * What does the TS bit do? Well, it causes the CPU to trap (interrupt 7) if + * the floating point unit is used. Which allows us to restore FPU state + * lazily after a task switch, and Linux uses that gratefully, but wouldn't a + * name like "FPUTRAP bit" be a little less cryptic? + * + * We store cr0 (and cr3) locally, because the Host never changes it. The + * Guest sometimes wants to read it and we'd prefer not to bother the Host + * unnecessarily. */ +static unsigned long current_cr0, current_cr3; +static void lguest_write_cr0(unsigned long val) +{ + lazy_hcall(LHCALL_TS, val & X86_CR0_TS, 0, 0); + current_cr0 = val; +} + +static unsigned long lguest_read_cr0(void) +{ + return current_cr0; +} + +/* Intel provided a special instruction to clear the TS bit for people too cool + * to use write_cr0() to do it. This "clts" instruction is faster, because all + * the vowels have been optimized out. */ +static void lguest_clts(void) +{ + lazy_hcall(LHCALL_TS, 0, 0, 0); + current_cr0 &= ~X86_CR0_TS; +} + +/* cr2 is the virtual address of the last page fault, which the Guest only ever + * reads. The Host kindly writes this into our "struct lguest_data", so we + * just read it out of there. */ +static unsigned long lguest_read_cr2(void) +{ + return lguest_data.cr2; +} + +/* cr3 is the current toplevel pagetable page: the principle is the same as + * cr0. Keep a local copy, and tell the Host when it changes. */ +static void lguest_write_cr3(unsigned long cr3) +{ + lazy_hcall(LHCALL_NEW_PGTABLE, cr3, 0, 0); + current_cr3 = cr3; +} + +static unsigned long lguest_read_cr3(void) +{ + return current_cr3; +} + +/* cr4 is used to enable and disable PGE, but we don't care. */ +static unsigned long lguest_read_cr4(void) +{ + return 0; +} + +static void lguest_write_cr4(unsigned long val) +{ +} + +/* + * Page Table Handling. + * + * Now would be a good time to take a rest and grab a coffee or similarly + * relaxing stimulant. The easy parts are behind us, and the trek gradually + * winds uphill from here. + * + * Quick refresher: memory is divided into "pages" of 4096 bytes each. The CPU + * maps virtual addresses to physical addresses using "page tables". We could + * use one huge index of 1 million entries: each address is 4 bytes, so that's + * 1024 pages just to hold the page tables. But since most virtual addresses + * are unused, we use a two level index which saves space. The cr3 register + * contains the physical address of the top level "page directory" page, which + * contains physical addresses of up to 1024 second-level pages. Each of these + * second level pages contains up to 1024 physical addresses of actual pages, + * or Page Table Entries (PTEs). + * + * Here's a diagram, where arrows indicate physical addresses: + * + * cr3 ---> +---------+ + * | --------->+---------+ + * | | | PADDR1 | + * Top-level | | PADDR2 | + * (PMD) page | | | + * | | Lower-level | + * | | (PTE) page | + * | | | | + * .... .... + * + * So to convert a virtual address to a physical address, we look up the top + * level, which points us to the second level, which gives us the physical + * address of that page. If the top level entry was not present, or the second + * level entry was not present, then the virtual address is invalid (we + * say "the page was not mapped"). + * + * Put another way, a 32-bit virtual address is divided up like so: + * + * 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + * |<---- 10 bits ---->|<---- 10 bits ---->|<------ 12 bits ------>| + * Index into top Index into second Offset within page + * page directory page pagetable page + * + * The kernel spends a lot of time changing both the top-level page directory + * and lower-level pagetable pages. The Guest doesn't know physical addresses, + * so while it maintains these page tables exactly like normal, it also needs + * to keep the Host informed whenever it makes a change: the Host will create + * the real page tables based on the Guests'. + */ + +/* The Guest calls this to set a second-level entry (pte), ie. to map a page + * into a process' address space. We set the entry then tell the Host the + * toplevel and address this corresponds to. The Guest uses one pagetable per + * process, so we need to tell the Host which one we're changing (mm->pgd). */ +static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + *ptep = pteval; + lazy_hcall(LHCALL_SET_PTE, __pa(mm->pgd), addr, pteval.pte_low); +} + +/* The Guest calls this to set a top-level entry. Again, we set the entry then + * tell the Host which top-level page we changed, and the index of the entry we + * changed. */ +static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + *pmdp = pmdval; + lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK, + (__pa(pmdp)&(PAGE_SIZE-1))/4, 0); +} + +/* There are a couple of legacy places where the kernel sets a PTE, but we + * don't know the top level any more. This is useless for us, since we don't + * know which pagetable is changing or what address, so we just tell the Host + * to forget all of them. Fortunately, this is very rare. + * + * ... except in early boot when the kernel sets up the initial pagetables, + * which makes booting astonishingly slow. So we don't even tell the Host + * anything changed until we've done the first page table switch. */ +static void lguest_set_pte(pte_t *ptep, pte_t pteval) +{ + *ptep = pteval; + /* Don't bother with hypercall before initial setup. */ + if (current_cr3) + lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); +} + +/* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on + * native page table operations. On native hardware you can set a new page + * table entry whenever you want, but if you want to remove one you have to do + * a TLB flush (a TLB is a little cache of page table entries kept by the CPU). + * + * So the lguest_set_pte_at() and lguest_set_pmd() functions above are only + * called when a valid entry is written, not when it's removed (ie. marked not + * present). Instead, this is where we come when the Guest wants to remove a + * page table entry: we tell the Host to set that entry to 0 (ie. the present + * bit is zero). */ +static void lguest_flush_tlb_single(unsigned long addr) +{ + /* Simply set it to zero: if it was not, it will fault back in. */ + lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0); +} + +/* This is what happens after the Guest has removed a large number of entries. + * This tells the Host that any of the page table entries for userspace might + * have changed, ie. virtual addresses below PAGE_OFFSET. */ +static void lguest_flush_tlb_user(void) +{ + lazy_hcall(LHCALL_FLUSH_TLB, 0, 0, 0); +} + +/* This is called when the kernel page tables have changed. That's not very + * common (unless the Guest is using highmem, which makes the Guest extremely + * slow), so it's worth separating this from the user flushing above. */ +static void lguest_flush_tlb_kernel(void) +{ + lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); +} + +/* + * The Unadvanced Programmable Interrupt Controller. + * + * This is an attempt to implement the simplest possible interrupt controller. + * I spent some time looking though routines like set_irq_chip_and_handler, + * set_irq_chip_and_handler_name, set_irq_chip_data and set_phasers_to_stun and + * I *think* this is as simple as it gets. + * + * We can tell the Host what interrupts we want blocked ready for using the + * lguest_data.interrupts bitmap, so disabling (aka "masking") them is as + * simple as setting a bit. We don't actually "ack" interrupts as such, we + * just mask and unmask them. I wonder if we should be cleverer? + */ +static void disable_lguest_irq(unsigned int irq) +{ + set_bit(irq, lguest_data.blocked_interrupts); +} + +static void enable_lguest_irq(unsigned int irq) +{ + clear_bit(irq, lguest_data.blocked_interrupts); +} + +/* This structure describes the lguest IRQ controller. */ +static struct irq_chip lguest_irq_controller = { + .name = "lguest", + .mask = disable_lguest_irq, + .mask_ack = disable_lguest_irq, + .unmask = enable_lguest_irq, +}; + +/* This sets up the Interrupt Descriptor Table (IDT) entry for each hardware + * interrupt (except 128, which is used for system calls), and then tells the + * Linux infrastructure that each interrupt is controlled by our level-based + * lguest interrupt controller. */ +static void __init lguest_init_IRQ(void) +{ + unsigned int i; + + for (i = 0; i < LGUEST_IRQS; i++) { + int vector = FIRST_EXTERNAL_VECTOR + i; + if (vector != SYSCALL_VECTOR) { + set_intr_gate(vector, interrupt[i]); + set_irq_chip_and_handler(i, &lguest_irq_controller, + handle_level_irq); + } + } + /* This call is required to set up for 4k stacks, where we have + * separate stacks for hard and soft interrupts. */ + irq_ctx_init(smp_processor_id()); +} + +/* + * Time. + * + * It would be far better for everyone if the Guest had its own clock, but + * until then the Host gives us the time on every interrupt. + */ +static unsigned long lguest_get_wallclock(void) +{ + return lguest_data.time.tv_sec; +} + +static cycle_t lguest_clock_read(void) +{ + unsigned long sec, nsec; + + /* If the Host tells the TSC speed, we can trust that. */ + if (lguest_data.tsc_khz) + return native_read_tsc(); + + /* If we can't use the TSC, we read the time value written by the Host. + * Since it's in two parts (seconds and nanoseconds), we risk reading + * it just as it's changing from 99 & 0.999999999 to 100 and 0, and + * getting 99 and 0. As Linux tends to come apart under the stress of + * time travel, we must be careful: */ + do { + /* First we read the seconds part. */ + sec = lguest_data.time.tv_sec; + /* This read memory barrier tells the compiler and the CPU that + * this can't be reordered: we have to complete the above + * before going on. */ + rmb(); + /* Now we read the nanoseconds part. */ + nsec = lguest_data.time.tv_nsec; + /* Make sure we've done that. */ + rmb(); + /* Now if the seconds part has changed, try again. */ + } while (unlikely(lguest_data.time.tv_sec != sec)); + + /* Our non-TSC clock is in real nanoseconds. */ + return sec*1000000000ULL + nsec; +} + +/* This is what we tell the kernel is our clocksource. */ +static struct clocksource lguest_clock = { + .name = "lguest", + .rating = 400, + .read = lguest_clock_read, + .mask = CLOCKSOURCE_MASK(64), + .mult = 1 << 22, + .shift = 22, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +/* The "scheduler clock" is just our real clock, adjusted to start at zero */ +static unsigned long long lguest_sched_clock(void) +{ + return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base); +} + +/* We also need a "struct clock_event_device": Linux asks us to set it to go + * off some time in the future. Actually, James Morris figured all this out, I + * just applied the patch. */ +static int lguest_clockevent_set_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + if (delta < LG_CLOCK_MIN_DELTA) { + if (printk_ratelimit()) + printk(KERN_DEBUG "%s: small delta %lu ns\n", + __FUNCTION__, delta); + return -ETIME; + } + hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0); + return 0; +} + +static void lguest_clockevent_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + switch (mode) { + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + /* A 0 argument shuts the clock down. */ + hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0); + break; + case CLOCK_EVT_MODE_ONESHOT: + /* This is what we expect. */ + break; + case CLOCK_EVT_MODE_PERIODIC: + BUG(); + case CLOCK_EVT_MODE_RESUME: + break; + } +} + +/* This describes our primitive timer chip. */ +static struct clock_event_device lguest_clockevent = { + .name = "lguest", + .features = CLOCK_EVT_FEAT_ONESHOT, + .set_next_event = lguest_clockevent_set_next_event, + .set_mode = lguest_clockevent_set_mode, + .rating = INT_MAX, + .mult = 1, + .shift = 0, + .min_delta_ns = LG_CLOCK_MIN_DELTA, + .max_delta_ns = LG_CLOCK_MAX_DELTA, +}; + +/* This is the Guest timer interrupt handler (hardware interrupt 0). We just + * call the clockevent infrastructure and it does whatever needs doing. */ +static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) +{ + unsigned long flags; + + /* Don't interrupt us while this is running. */ + local_irq_save(flags); + lguest_clockevent.event_handler(&lguest_clockevent); + local_irq_restore(flags); +} + +/* At some point in the boot process, we get asked to set up our timing + * infrastructure. The kernel doesn't expect timer interrupts before this, but + * we cleverly initialized the "blocked_interrupts" field of "struct + * lguest_data" so that timer interrupts were blocked until now. */ +static void lguest_time_init(void) +{ + /* Set up the timer interrupt (0) to go to our simple timer routine */ + set_irq_handler(0, lguest_time_irq); + + /* Our clock structure looks like arch/x86/kernel/tsc_32.c if we can + * use the TSC, otherwise it's a dumb nanosecond-resolution clock. + * Either way, the "rating" is set so high that it's always chosen over + * any other clocksource. */ + if (lguest_data.tsc_khz) + lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, + lguest_clock.shift); + clock_base = lguest_clock_read(); + clocksource_register(&lguest_clock); + + /* Now we've set up our clock, we can use it as the scheduler clock */ + pv_time_ops.sched_clock = lguest_sched_clock; + + /* We can't set cpumask in the initializer: damn C limitations! Set it + * here and register our timer device. */ + lguest_clockevent.cpumask = cpumask_of_cpu(0); + clockevents_register_device(&lguest_clockevent); + + /* Finally, we unblock the timer interrupt. */ + enable_lguest_irq(0); +} + +/* + * Miscellaneous bits and pieces. + * + * Here is an oddball collection of functions which the Guest needs for things + * to work. They're pretty simple. + */ + +/* The Guest needs to tell the Host what stack it expects traps to use. For + * native hardware, this is part of the Task State Segment mentioned above in + * lguest_load_tr_desc(), but to help hypervisors there's this special call. + * + * We tell the Host the segment we want to use (__KERNEL_DS is the kernel data + * segment), the privilege level (we're privilege level 1, the Host is 0 and + * will not tolerate us trying to use that), the stack pointer, and the number + * of pages in the stack. */ +static void lguest_load_esp0(struct tss_struct *tss, + struct thread_struct *thread) +{ + lazy_hcall(LHCALL_SET_STACK, __KERNEL_DS|0x1, thread->esp0, + THREAD_SIZE/PAGE_SIZE); +} + +/* Let's just say, I wouldn't do debugging under a Guest. */ +static void lguest_set_debugreg(int regno, unsigned long value) +{ + /* FIXME: Implement */ +} + +/* There are times when the kernel wants to make sure that no memory writes are + * caught in the cache (that they've all reached real hardware devices). This + * doesn't matter for the Guest which has virtual hardware. + * + * On the Pentium 4 and above, cpuid() indicates that the Cache Line Flush + * (clflush) instruction is available and the kernel uses that. Otherwise, it + * uses the older "Write Back and Invalidate Cache" (wbinvd) instruction. + * Unlike clflush, wbinvd can only be run at privilege level 0. So we can + * ignore clflush, but replace wbinvd. + */ +static void lguest_wbinvd(void) +{ +} + +/* If the Guest expects to have an Advanced Programmable Interrupt Controller, + * we play dumb by ignoring writes and returning 0 for reads. So it's no + * longer Programmable nor Controlling anything, and I don't think 8 lines of + * code qualifies for Advanced. It will also never interrupt anything. It + * does, however, allow us to get through the Linux boot code. */ +#ifdef CONFIG_X86_LOCAL_APIC +static void lguest_apic_write(unsigned long reg, unsigned long v) +{ +} + +static unsigned long lguest_apic_read(unsigned long reg) +{ + return 0; +} +#endif + +/* STOP! Until an interrupt comes in. */ +static void lguest_safe_halt(void) +{ + hcall(LHCALL_HALT, 0, 0, 0); +} + +/* Perhaps CRASH isn't the best name for this hypercall, but we use it to get a + * message out when we're crashing as well as elegant termination like powering + * off. + * + * Note that the Host always prefers that the Guest speak in physical addresses + * rather than virtual addresses, so we use __pa() here. */ +static void lguest_power_off(void) +{ + hcall(LHCALL_CRASH, __pa("Power down"), 0, 0); +} + +/* + * Panicing. + * + * Don't. But if you did, this is what happens. + */ +static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p) +{ + hcall(LHCALL_CRASH, __pa(p), 0, 0); + /* The hcall won't return, but to keep gcc happy, we're "done". */ + return NOTIFY_DONE; +} + +static struct notifier_block paniced = { + .notifier_call = lguest_panic +}; + +/* Setting up memory is fairly easy. */ +static __init char *lguest_memory_setup(void) +{ + /* We do this here and not earlier because lockcheck barfs if we do it + * before start_kernel() */ + atomic_notifier_chain_register(&panic_notifier_list, &paniced); + + /* The Linux bootloader header contains an "e820" memory map: the + * Launcher populated the first entry with our memory limit. */ + add_memory_region(boot_params.e820_map[0].addr, + boot_params.e820_map[0].size, + boot_params.e820_map[0].type); + + /* This string is for the boot messages. */ + return "LGUEST"; +} + +/* We will eventually use the virtio console device to produce console output, + * but before that is set up we use LHCALL_NOTIFY on normal memory to produce + * console output. */ +static __init int early_put_chars(u32 vtermno, const char *buf, int count) +{ + char scratch[17]; + unsigned int len = count; + + /* We use a nul-terminated string, so we have to make a copy. Icky, + * huh? */ + if (len > sizeof(scratch) - 1) + len = sizeof(scratch) - 1; + scratch[len] = '\0'; + memcpy(scratch, buf, len); + hcall(LHCALL_NOTIFY, __pa(scratch), 0, 0); + + /* This routine returns the number of bytes actually written. */ + return len; +} + +/*G:050 + * Patching (Powerfully Placating Performance Pedants) + * + * We have already seen that pv_ops structures let us replace simple + * native instructions with calls to the appropriate back end all throughout + * the kernel. This allows the same kernel to run as a Guest and as a native + * kernel, but it's slow because of all the indirect branches. + * + * Remember that David Wheeler quote about "Any problem in computer science can + * be solved with another layer of indirection"? The rest of that quote is + * "... But that usually will create another problem." This is the first of + * those problems. + * + * Our current solution is to allow the paravirt back end to optionally patch + * over the indirect calls to replace them with something more efficient. We + * patch the four most commonly called functions: disable interrupts, enable + * interrupts, restore interrupts and save interrupts. We usually have 6 or 10 + * bytes to patch into: the Guest versions of these operations are small enough + * that we can fit comfortably. + * + * First we need assembly templates of each of the patchable Guest operations, + * and these are in lguest_asm.S. */ + +/*G:060 We construct a table from the assembler templates: */ +static const struct lguest_insns +{ + const char *start, *end; +} lguest_insns[] = { + [PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli }, + [PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti }, + [PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf }, + [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf }, +}; + +/* Now our patch routine is fairly simple (based on the native one in + * paravirt.c). If we have a replacement, we copy it in and return how much of + * the available space we used. */ +static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf, + unsigned long addr, unsigned len) +{ + unsigned int insn_len; + + /* Don't do anything special if we don't have a replacement */ + if (type >= ARRAY_SIZE(lguest_insns) || !lguest_insns[type].start) + return paravirt_patch_default(type, clobber, ibuf, addr, len); + + insn_len = lguest_insns[type].end - lguest_insns[type].start; + + /* Similarly if we can't fit replacement (shouldn't happen, but let's + * be thorough). */ + if (len < insn_len) + return paravirt_patch_default(type, clobber, ibuf, addr, len); + + /* Copy in our instructions. */ + memcpy(ibuf, lguest_insns[type].start, insn_len); + return insn_len; +} + +/*G:030 Once we get to lguest_init(), we know we're a Guest. The pv_ops + * structures in the kernel provide points for (almost) every routine we have + * to override to avoid privileged instructions. */ +__init void lguest_init(void) +{ + /* We're under lguest, paravirt is enabled, and we're running at + * privilege level 1, not 0 as normal. */ + pv_info.name = "lguest"; + pv_info.paravirt_enabled = 1; + pv_info.kernel_rpl = 1; + + /* We set up all the lguest overrides for sensitive operations. These + * are detailed with the operations themselves. */ + + /* interrupt-related operations */ + pv_irq_ops.init_IRQ = lguest_init_IRQ; + pv_irq_ops.save_fl = save_fl; + pv_irq_ops.restore_fl = restore_fl; + pv_irq_ops.irq_disable = irq_disable; + pv_irq_ops.irq_enable = irq_enable; + pv_irq_ops.safe_halt = lguest_safe_halt; + + /* init-time operations */ + pv_init_ops.memory_setup = lguest_memory_setup; + pv_init_ops.patch = lguest_patch; + + /* Intercepts of various cpu instructions */ + pv_cpu_ops.load_gdt = lguest_load_gdt; + pv_cpu_ops.cpuid = lguest_cpuid; + pv_cpu_ops.load_idt = lguest_load_idt; + pv_cpu_ops.iret = lguest_iret; + pv_cpu_ops.load_esp0 = lguest_load_esp0; + pv_cpu_ops.load_tr_desc = lguest_load_tr_desc; + pv_cpu_ops.set_ldt = lguest_set_ldt; + pv_cpu_ops.load_tls = lguest_load_tls; + pv_cpu_ops.set_debugreg = lguest_set_debugreg; + pv_cpu_ops.clts = lguest_clts; + pv_cpu_ops.read_cr0 = lguest_read_cr0; + pv_cpu_ops.write_cr0 = lguest_write_cr0; + pv_cpu_ops.read_cr4 = lguest_read_cr4; + pv_cpu_ops.write_cr4 = lguest_write_cr4; + pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry; + pv_cpu_ops.write_idt_entry = lguest_write_idt_entry; + pv_cpu_ops.wbinvd = lguest_wbinvd; + pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu; + pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode; + + /* pagetable management */ + pv_mmu_ops.write_cr3 = lguest_write_cr3; + pv_mmu_ops.flush_tlb_user = lguest_flush_tlb_user; + pv_mmu_ops.flush_tlb_single = lguest_flush_tlb_single; + pv_mmu_ops.flush_tlb_kernel = lguest_flush_tlb_kernel; + pv_mmu_ops.set_pte = lguest_set_pte; + pv_mmu_ops.set_pte_at = lguest_set_pte_at; + pv_mmu_ops.set_pmd = lguest_set_pmd; + pv_mmu_ops.read_cr2 = lguest_read_cr2; + pv_mmu_ops.read_cr3 = lguest_read_cr3; + pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; + pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode; + +#ifdef CONFIG_X86_LOCAL_APIC + /* apic read/write intercepts */ + pv_apic_ops.apic_write = lguest_apic_write; + pv_apic_ops.apic_write_atomic = lguest_apic_write; + pv_apic_ops.apic_read = lguest_apic_read; +#endif + + /* time operations */ + pv_time_ops.get_wallclock = lguest_get_wallclock; + pv_time_ops.time_init = lguest_time_init; + + /* Now is a good time to look at the implementations of these functions + * before returning to the rest of lguest_init(). */ + + /*G:070 Now we've seen all the paravirt_ops, we return to + * lguest_init() where the rest of the fairly chaotic boot setup + * occurs. */ + + /* The native boot code sets up initial page tables immediately after + * the kernel itself, and sets init_pg_tables_end so they're not + * clobbered. The Launcher places our initial pagetables somewhere at + * the top of our physical memory, so we don't need extra space: set + * init_pg_tables_end to the end of the kernel. */ + init_pg_tables_end = __pa(pg0); + + /* Load the %fs segment register (the per-cpu segment register) with + * the normal data segment to get through booting. */ + asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); + + /* The Host uses the top of the Guest's virtual address space for the + * Host<->Guest Switcher, and it tells us how big that is in + * lguest_data.reserve_mem, set up on the LGUEST_INIT hypercall. */ + reserve_top_address(lguest_data.reserve_mem); + + /* If we don't initialize the lock dependency checker now, it crashes + * paravirt_disable_iospace. */ + lockdep_init(); + + /* The IDE code spends about 3 seconds probing for disks: if we reserve + * all the I/O ports up front it can't get them and so doesn't probe. + * Other device drivers are similar (but less severe). This cuts the + * kernel boot time on my machine from 4.1 seconds to 0.45 seconds. */ + paravirt_disable_iospace(); + + /* This is messy CPU setup stuff which the native boot code does before + * start_kernel, so we have to do, too: */ + cpu_detect(&new_cpu_data); + /* head.S usually sets up the first capability word, so do it here. */ + new_cpu_data.x86_capability[0] = cpuid_edx(1); + + /* Math is always hard! */ + new_cpu_data.hard_math = 1; + +#ifdef CONFIG_X86_MCE + mce_disabled = 1; +#endif +#ifdef CONFIG_ACPI + acpi_disabled = 1; + acpi_ht = 0; +#endif + + /* We set the perferred console to "hvc". This is the "hypervisor + * virtual console" driver written by the PowerPC people, which we also + * adapted for lguest's use. */ + add_preferred_console("hvc", 0, NULL); + + /* Register our very early console. */ + virtio_cons_early_init(early_put_chars); + + /* Last of all, we set the power management poweroff hook to point to + * the Guest routine to power off. */ + pm_power_off = lguest_power_off; + + /* Now we're set up, call start_kernel() in init/main.c and we proceed + * to boot as normal. It never returns. */ + start_kernel(); +} +/* + * This marks the end of stage II of our journey, The Guest. + * + * It is now time for us to explore the layer of virtual drivers and complete + * our understanding of the Guest in "make Drivers". + */ diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S new file mode 100644 index 000000000000..95b6fbcded63 --- /dev/null +++ b/arch/x86/lguest/i386_head.S @@ -0,0 +1,117 @@ +#include <linux/linkage.h> +#include <linux/lguest.h> +#include <asm/lguest_hcall.h> +#include <asm/asm-offsets.h> +#include <asm/thread_info.h> +#include <asm/processor-flags.h> + +/*G:020 This is where we begin: head.S notes that the boot header's platform + * type field is "1" (lguest), so calls us here. + * + * WARNING: be very careful here! We're running at addresses equal to physical + * addesses (around 0), not above PAGE_OFFSET as most code expectes + * (eg. 0xC0000000). Jumps are relative, so they're OK, but we can't touch any + * data. + * + * The .section line puts this code in .init.text so it will be discarded after + * boot. */ +.section .init.text, "ax", @progbits +ENTRY(lguest_entry) + /* We make the "initialization" hypercall now to tell the Host about + * us, and also find out where it put our page tables. */ + movl $LHCALL_LGUEST_INIT, %eax + movl $lguest_data - __PAGE_OFFSET, %edx + int $LGUEST_TRAP_ENTRY + + /* The Host put the toplevel pagetable in lguest_data.pgdir. The movsl + * instruction uses %esi implicitly as the source for the copy we' + * about to do. */ + movl lguest_data - __PAGE_OFFSET + LGUEST_DATA_pgdir, %esi + + /* Copy first 32 entries of page directory to __PAGE_OFFSET entries. + * This means the first 128M of kernel memory will be mapped at + * PAGE_OFFSET where the kernel expects to run. This will get it far + * enough through boot to switch to its own pagetables. */ + movl $32, %ecx + movl %esi, %edi + addl $((__PAGE_OFFSET >> 22) * 4), %edi + rep + movsl + + /* Set up the initial stack so we can run C code. */ + movl $(init_thread_union+THREAD_SIZE),%esp + + /* Jumps are relative, and we're running __PAGE_OFFSET too low at the + * moment. */ + jmp lguest_init+__PAGE_OFFSET + +/*G:055 We create a macro which puts the assembler code between lgstart_ and + * lgend_ markers. These templates are put in the .text section: they can't be + * discarded after boot as we may need to patch modules, too. */ +.text +#define LGUEST_PATCH(name, insns...) \ + lgstart_##name: insns; lgend_##name:; \ + .globl lgstart_##name; .globl lgend_##name + +LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled) +LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled) +LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled) +LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax) +/*:*/ + +/* These demark the EIP range where host should never deliver interrupts. */ +.global lguest_noirq_start +.global lguest_noirq_end + +/*M:004 When the Host reflects a trap or injects an interrupt into the Guest, + * it sets the eflags interrupt bit on the stack based on + * lguest_data.irq_enabled, so the Guest iret logic does the right thing when + * restoring it. However, when the Host sets the Guest up for direct traps, + * such as system calls, the processor is the one to push eflags onto the + * stack, and the interrupt bit will be 1 (in reality, interrupts are always + * enabled in the Guest). + * + * This turns out to be harmless: the only trap which should happen under Linux + * with interrupts disabled is Page Fault (due to our lazy mapping of vmalloc + * regions), which has to be reflected through the Host anyway. If another + * trap *does* go off when interrupts are disabled, the Guest will panic, and + * we'll never get to this iret! :*/ + +/*G:045 There is one final paravirt_op that the Guest implements, and glancing + * at it you can see why I left it to last. It's *cool*! It's in *assembler*! + * + * The "iret" instruction is used to return from an interrupt or trap. The + * stack looks like this: + * old address + * old code segment & privilege level + * old processor flags ("eflags") + * + * The "iret" instruction pops those values off the stack and restores them all + * at once. The only problem is that eflags includes the Interrupt Flag which + * the Guest can't change: the CPU will simply ignore it when we do an "iret". + * So we have to copy eflags from the stack to lguest_data.irq_enabled before + * we do the "iret". + * + * There are two problems with this: firstly, we need to use a register to do + * the copy and secondly, the whole thing needs to be atomic. The first + * problem is easy to solve: push %eax on the stack so we can use it, and then + * restore it at the end just before the real "iret". + * + * The second is harder: copying eflags to lguest_data.irq_enabled will turn + * interrupts on before we're finished, so we could be interrupted before we + * return to userspace or wherever. Our solution to this is to surround the + * code with lguest_noirq_start: and lguest_noirq_end: labels. We tell the + * Host that it is *never* to interrupt us there, even if interrupts seem to be + * enabled. */ +ENTRY(lguest_iret) + pushl %eax + movl 12(%esp), %eax +lguest_noirq_start: + /* Note the %ss: segment prefix here. Normal data accesses use the + * "ds" segment, but that will have already been restored for whatever + * we're returning to (such as userspace): we can't trust it. The %ss: + * prefix makes sure we use the stack segment, which is still valid. */ + movl %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled + popl %eax + iret +lguest_noirq_end: diff --git a/arch/x86/lib/bitstr_64.c b/arch/x86/lib/bitstr_64.c index 24676609a6ac..7445caf1b5de 100644 --- a/arch/x86/lib/bitstr_64.c +++ b/arch/x86/lib/bitstr_64.c @@ -14,7 +14,7 @@ find_next_zero_string(unsigned long *bitmap, long start, long nbits, int len) /* could test bitsliced, but it's hardly worth it */ end = n+len; - if (end >= nbits) + if (end > nbits) return -1; for (i = n+1; i < end; i++) { if (test_bit(i, bitmap)) { diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c index f6edb11364df..aad9d95469dc 100644 --- a/arch/x86/lib/delay_32.c +++ b/arch/x86/lib/delay_32.c @@ -12,6 +12,7 @@ #include <linux/module.h> #include <linux/sched.h> +#include <linux/preempt.h> #include <linux/delay.h> #include <asm/processor.h> @@ -42,11 +43,13 @@ static void delay_tsc(unsigned long loops) { unsigned long bclock, now; + preempt_disable(); /* TSC's are per-cpu */ rdtscl(bclock); do { rep_nop(); rdtscl(now); } while ((now-bclock) < loops); + preempt_enable(); } /* @@ -82,7 +85,7 @@ inline void __const_udelay(unsigned long xloops) __asm__("mull %0" :"=d" (xloops), "=&a" (d0) :"1" (xloops), "0" - (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4))); + (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4))); __delay(++xloops); } diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c index 2dbebd308347..45cdd3fbd91c 100644 --- a/arch/x86/lib/delay_64.c +++ b/arch/x86/lib/delay_64.c @@ -10,7 +10,9 @@ #include <linux/module.h> #include <linux/sched.h> +#include <linux/preempt.h> #include <linux/delay.h> + #include <asm/delay.h> #include <asm/msr.h> @@ -27,20 +29,22 @@ int read_current_timer(unsigned long *timer_value) void __delay(unsigned long loops) { unsigned bclock, now; - + + preempt_disable(); /* TSC's are pre-cpu */ rdtscl(bclock); - do - { + do { rep_nop(); rdtscl(now); } - while((now-bclock) < loops); + while ((now-bclock) < loops); + preempt_enable(); } EXPORT_SYMBOL(__delay); inline void __const_udelay(unsigned long xloops) { - __delay(((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32) + 1); + __delay(((xloops * HZ * + cpu_data(raw_smp_processor_id()).loops_per_jiffy) >> 32) + 1); } EXPORT_SYMBOL(__const_udelay); diff --git a/arch/x86/lib/msr-on-cpu.c b/arch/x86/lib/msr-on-cpu.c index 7767962f25d3..57d043fa893e 100644 --- a/arch/x86/lib/msr-on-cpu.c +++ b/arch/x86/lib/msr-on-cpu.c @@ -26,27 +26,18 @@ static void __rdmsr_safe_on_cpu(void *info) static int _rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h, int safe) { int err = 0; - preempt_disable(); - if (smp_processor_id() == cpu) - if (safe) - err = rdmsr_safe(msr_no, l, h); - else - rdmsr(msr_no, *l, *h); - else { - struct msr_info rv; - - rv.msr_no = msr_no; - if (safe) { - smp_call_function_single(cpu, __rdmsr_safe_on_cpu, - &rv, 0, 1); - err = rv.err; - } else { - smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 0, 1); - } - *l = rv.l; - *h = rv.h; + struct msr_info rv; + + rv.msr_no = msr_no; + if (safe) { + smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 0, 1); + err = rv.err; + } else { + smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 0, 1); } - preempt_enable(); + *l = rv.l; + *h = rv.h; + return err; } @@ -67,27 +58,18 @@ static void __wrmsr_safe_on_cpu(void *info) static int _wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h, int safe) { int err = 0; - preempt_disable(); - if (smp_processor_id() == cpu) - if (safe) - err = wrmsr_safe(msr_no, l, h); - else - wrmsr(msr_no, l, h); - else { - struct msr_info rv; - - rv.msr_no = msr_no; - rv.l = l; - rv.h = h; - if (safe) { - smp_call_function_single(cpu, __wrmsr_safe_on_cpu, - &rv, 0, 1); - err = rv.err; - } else { - smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 0, 1); - } + struct msr_info rv; + + rv.msr_no = msr_no; + rv.l = l; + rv.h = h; + if (safe) { + smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 0, 1); + err = rv.err; + } else { + smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 0, 1); } - preempt_enable(); + return err; } diff --git a/arch/x86/lib/rwlock_64.S b/arch/x86/lib/rwlock_64.S index 0cde1f807314..05ea55f71405 100644 --- a/arch/x86/lib/rwlock_64.S +++ b/arch/x86/lib/rwlock_64.S @@ -2,7 +2,7 @@ #include <linux/linkage.h> #include <asm/rwlock.h> -#include <asm/alternative-asm.i> +#include <asm/alternative-asm.h> #include <asm/dwarf2.h> /* rdi: pointer to rwlock_t */ diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S index c01eb39c0b43..444fba400983 100644 --- a/arch/x86/lib/semaphore_32.S +++ b/arch/x86/lib/semaphore_32.S @@ -15,8 +15,8 @@ #include <linux/linkage.h> #include <asm/rwlock.h> -#include <asm/alternative-asm.i> -#include <asm/frame.i> +#include <asm/alternative-asm.h> +#include <asm/frame.h> #include <asm/dwarf2.h> /* diff --git a/arch/x86/lib/string_32.c b/arch/x86/lib/string_32.c index 2c773fefa3dd..c2c0504a3071 100644 --- a/arch/x86/lib/string_32.c +++ b/arch/x86/lib/string_32.c @@ -160,26 +160,6 @@ char *strchr(const char * s, int c) EXPORT_SYMBOL(strchr); #endif -#ifdef __HAVE_ARCH_STRRCHR -char *strrchr(const char * s, int c) -{ - int d0, d1; - char * res; - asm volatile( "movb %%al,%%ah\n" - "1:\tlodsb\n\t" - "cmpb %%ah,%%al\n\t" - "jne 2f\n\t" - "leal -1(%%esi),%0\n" - "2:\ttestb %%al,%%al\n\t" - "jne 1b" - :"=g" (res), "=&S" (d0), "=&a" (d1) - :"0" (0),"1" (s),"2" (c) - :"memory"); - return res; -} -EXPORT_SYMBOL(strrchr); -#endif - #ifdef __HAVE_ARCH_STRLEN size_t strlen(const char * s) { diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 9f38b12b4af1..8bab2b2efaff 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -748,7 +748,7 @@ survive: retval = get_user_pages(current, current->mm, (unsigned long )to, 1, 1, 0, &pg, NULL); - if (retval == -ENOMEM && is_init(current)) { + if (retval == -ENOMEM && is_global_init(current)) { up_read(¤t->mm->mmap_sem); congestion_wait(WRITE, HZ/50); goto survive; diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 7f635c7a2381..0c28a071824c 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -35,7 +35,11 @@ void __init pre_intr_init_hook(void) /* * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; +static struct irqaction irq2 = { + .handler = no_action, + .mask = CPU_MASK_NONE, + .name = "cascade", +}; /** * intr_init_hook - post gate setup interrupt initialisation @@ -104,7 +108,7 @@ void __init time_init_hook(void) * mca_nmi_hook - hook into MCA specific NMI chain * * Description: - * The MCA (Microchannel Arcitecture) has an NMI chain for NMI sources + * The MCA (Microchannel Architecture) has an NMI chain for NMI sources * along the MCA bus. Use this to hook into that chain if you will need * it. **/ @@ -127,7 +131,7 @@ static __init int no_ipi_broadcast(char *str) return 1; } -__setup("no_ipi_broadcast", no_ipi_broadcast); +__setup("no_ipi_broadcast=", no_ipi_broadcast); static int __init print_ipi_mode(void) { @@ -159,16 +163,18 @@ char * __init machine_specific_memory_setup(void) * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */ - sanitize_e820_map(E820_MAP, &E820_MAP_NR); - if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) { + sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); + if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) + < 0) { unsigned long mem_size; /* compare results from other methods and take the greater */ - if (ALT_MEM_K < EXT_MEM_K) { - mem_size = EXT_MEM_K; + if (boot_params.alt_mem_k + < boot_params.screen_info.ext_mem_k) { + mem_size = boot_params.screen_info.ext_mem_k; who = "BIOS-88"; } else { - mem_size = ALT_MEM_K; + mem_size = boot_params.alt_mem_k; who = "BIOS-e801"; } diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/mach-es7000/es7000plat.c index ab99072d3f9a..f5d6f7d8b86e 100644 --- a/arch/x86/mach-es7000/es7000plat.c +++ b/arch/x86/mach-es7000/es7000plat.c @@ -46,11 +46,11 @@ * ES7000 Globals */ -volatile unsigned long *psai = NULL; -struct mip_reg *mip_reg; -struct mip_reg *host_reg; -int mip_port; -unsigned long mip_addr, host_addr; +static volatile unsigned long *psai = NULL; +static struct mip_reg *mip_reg; +static struct mip_reg *host_reg; +static int mip_port; +static unsigned long mip_addr, host_addr; /* * GSI override for ES7000 platforms. @@ -288,28 +288,8 @@ es7000_start_cpu(int cpu, unsigned long eip) } -int -es7000_stop_cpu(int cpu) -{ - int startup; - - if (psai == NULL) - return -1; - - startup= (0x1000000 | cpu); - - while ((*psai & 0xff00ffff) != startup) - ; - - startup = (*psai & 0xff0000) >> 16; - *psai &= 0xffffff; - - return 0; - -} - void __init -es7000_sw_apic() +es7000_sw_apic(void) { if (es7000_plat) { int mip_status; diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 8685208d8512..1af0cc7648f0 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -1,5 +1,5 @@ /* - * Default generic APIC driver. This handles upto 8 CPUs. + * Default generic APIC driver. This handles up to 8 CPUs. */ #define APIC_DEFINITION 1 #include <linux/threads.h> diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c index 74f3da634423..f410d3cb5659 100644 --- a/arch/x86/mach-generic/probe.c +++ b/arch/x86/mach-generic/probe.c @@ -22,7 +22,7 @@ extern struct genapic apic_default; struct genapic *genapic = &apic_default; -struct genapic *apic_probe[] __initdata = { +static struct genapic *apic_probe[] __initdata = { &apic_summit, &apic_bigsmp, &apic_es7000, @@ -56,7 +56,7 @@ void __init generic_bigsmp_probe(void) /* * This routine is used to switch to bigsmp mode when * - There is no apic= option specified by the user - * - generic_apic_probe() has choosen apic_default as the sub_arch + * - generic_apic_probe() has chosen apic_default as the sub_arch * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support */ diff --git a/arch/x86/mach-visws/setup.c b/arch/x86/mach-visws/setup.c index 1f81f10e03a0..de4c9dbd086f 100644 --- a/arch/x86/mach-visws/setup.c +++ b/arch/x86/mach-visws/setup.c @@ -152,7 +152,7 @@ char * __init machine_specific_memory_setup(void) { long long gfx_mem_size = 8 * MB; - mem_size = ALT_MEM_K; + mem_size = boot_params.alt_mem_k; if (!mem_size) { printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n"); diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c index 2b55694e6400..3bef977cb29b 100644 --- a/arch/x86/mach-voyager/setup.c +++ b/arch/x86/mach-voyager/setup.c @@ -18,7 +18,11 @@ void __init pre_intr_init_hook(void) /* * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; +static struct irqaction irq2 = { + .handler = no_action, + .mask = CPU_MASK_NONE, + .name = "cascade", +}; void __init intr_init_hook(void) { @@ -83,7 +87,7 @@ char * __init machine_specific_memory_setup(void) if(inb(catbase) != VOYAGER_DINO) { printk(KERN_ERR "Voyager: Failed to get DINO for L4, setting tom to EXT_MEM_K\n"); - tom = (EXT_MEM_K)<<10; + tom = (boot_params.screen_info.ext_mem_k)<<10; } who = "Voyager-TOM"; add_memory_region(0, 0x9f000, E820_RAM); @@ -104,16 +108,18 @@ char * __init machine_specific_memory_setup(void) * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */ - sanitize_e820_map(E820_MAP, &E820_MAP_NR); - if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) { + sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); + if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) + < 0) { unsigned long mem_size; /* compare results from other methods and take the greater */ - if (ALT_MEM_K < EXT_MEM_K) { - mem_size = EXT_MEM_K; + if (boot_params.alt_mem_k + < boot_params.screen_info.ext_mem_k) { + mem_size = boot_params.screen_info.ext_mem_k; who = "BIOS-88"; } else { - mem_size = ALT_MEM_K; + mem_size = boot_params.alt_mem_k; who = "BIOS-e801"; } diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index b87f8548e75a..69371434b0cf 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -29,15 +29,15 @@ #include <asm/arch_hooks.h> /* TLB state -- visible externally, indexed physically */ -DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0 }; +DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { &init_mm, 0 }; /* CPU IRQ affinity -- set to all ones initially */ static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = ~0UL }; /* per CPU data structure (for /proc/cpuinfo et al), visible externally * indexed physically */ -struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_data); +DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); +EXPORT_PER_CPU_SYMBOL(cpu_info); /* physical ID of the CPU used to boot the system */ unsigned char boot_cpu_id; @@ -389,7 +389,7 @@ find_smp_config(void) /* The boot CPU must be extended */ voyager_extended_vic_processors = 1<<boot_cpu_id; - /* initially, all of the first 8 cpu's can boot */ + /* initially, all of the first 8 CPUs can boot */ voyager_allowed_boot_processors = 0xff; /* set up everything for just this CPU, we can alter * this as we start the other CPUs later */ @@ -430,7 +430,7 @@ find_smp_config(void) void __init smp_store_cpu_info(int id) { - struct cpuinfo_x86 *c=&cpu_data[id]; + struct cpuinfo_x86 *c = &cpu_data(id); *c = boot_cpu_data; @@ -442,8 +442,8 @@ static __u32 __init setup_trampoline(void) { /* these two are global symbols in trampoline.S */ - extern __u8 trampoline_end[]; - extern __u8 trampoline_data[]; + extern const __u8 trampoline_end[]; + extern const __u8 trampoline_data[]; memcpy((__u8 *)trampoline_base, trampoline_data, trampoline_end - trampoline_data); @@ -634,7 +634,7 @@ do_boot_cpu(__u8 cpu) cpu, smp_processor_id())); printk("CPU%d: ", cpu); - print_cpu_info(&cpu_data[cpu]); + print_cpu_info(&cpu_data(cpu)); wmb(); cpu_set(cpu, cpu_callout_map); cpu_set(cpu, cpu_present_map); @@ -683,7 +683,7 @@ smp_boot_cpus(void) */ smp_store_cpu_info(boot_cpu_id); printk("CPU%d: ", boot_cpu_id); - print_cpu_info(&cpu_data[boot_cpu_id]); + print_cpu_info(&cpu_data(boot_cpu_id)); if(is_cpu_quad()) { /* booting on a Quad CPU */ @@ -714,7 +714,7 @@ smp_boot_cpus(void) unsigned long bogosum = 0; for (i = 0; i < NR_CPUS; i++) if (cpu_isset(i, cpu_online_map)) - bogosum += cpu_data[i].loops_per_jiffy; + bogosum += cpu_data(i).loops_per_jiffy; printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", cpucount+1, bogosum/(500000/HZ), @@ -1010,7 +1010,7 @@ static struct call_data_struct * call_data; /* execute a thread on a new CPU. The function to be called must be * previously set up. This is used to schedule a function for - * execution on all CPU's - set up the function then broadcast a + * execution on all CPUs - set up the function then broadcast a * function_interrupt CPI to come here on each CPU */ static void smp_call_function_interrupt(void) @@ -1037,6 +1037,7 @@ smp_call_function_interrupt(void) */ irq_enter(); (*func)(info); + __get_cpu_var(irq_stat).irq_call_count++; irq_exit(); if (wait) { mb(); @@ -1094,7 +1095,7 @@ voyager_smp_call_function_mask (cpumask_t cpumask, * CPI here. We don't use this actually for counting so losing * ticks doesn't matter * - * FIXME: For those CPU's which actually have a local APIC, we could + * FIXME: For those CPUs which actually have a local APIC, we could * try to use it to trigger this interrupt instead of having to * broadcast the timer tick. Unfortunately, all my pentium DYADs have * no local APIC, so I can't do this @@ -1286,7 +1287,7 @@ smp_local_timer_interrupt(void) /* * We take the 'long' return path, and there every subsystem - * grabs the apropriate locks (kernel lock/ irq lock). + * grabs the appropriate locks (kernel lock/ irq lock). * * we might want to decouple profiling from the 'long path', * and do the profiling totally in assembly. @@ -1758,7 +1759,7 @@ set_vic_irq_affinity(unsigned int irq, cpumask_t mask) real_mask = cpus_addr(mask)[0] & voyager_extended_vic_processors; if(cpus_addr(mask)[0] == 0) - /* can't have no cpu's to accept the interrupt -- extremely + /* can't have no CPUs to accept the interrupt -- extremely * bad things will happen */ return; @@ -1790,7 +1791,7 @@ set_vic_irq_affinity(unsigned int irq, cpumask_t mask) } /* this is magic, we now have the correct affinity maps, so * enable the interrupt. This will send an enable CPI to - * those cpu's who need to enable it in their local masks, + * those CPUs who need to enable it in their local masks, * causing them to correct for the new affinity . If the * interrupt is currently globally disabled, it will simply be * disabled again as it comes in (voyager lazy disable). If diff --git a/arch/x86/mach-voyager/voyager_thread.c b/arch/x86/mach-voyager/voyager_thread.c index f9d595338159..50f9366c411e 100644 --- a/arch/x86/mach-voyager/voyager_thread.c +++ b/arch/x86/mach-voyager/voyager_thread.c @@ -64,7 +64,7 @@ check_from_kernel(void) { if(voyager_status.switch_off) { - /* FIXME: This should be configureable via proc */ + /* FIXME: This should be configurable via proc */ execute("umask 600; echo 0 > /etc/initrunlvl; kill -HUP 1"); } else if(voyager_status.power_fail) { VDEBUG(("Voyager daemon detected AC power failure\n")); diff --git a/arch/x86/math-emu/Makefile b/arch/x86/math-emu/Makefile index 9c943fa6ce6b..9b0c63b60302 100644 --- a/arch/x86/math-emu/Makefile +++ b/arch/x86/math-emu/Makefile @@ -5,8 +5,7 @@ #DEBUG = -DDEBUGGING DEBUG = PARANOID = -DPARANOID -CFLAGS := $(CFLAGS) $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION) - +EXTRA_CFLAGS := $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION) EXTRA_AFLAGS := $(PARANOID) # From 'C' language sources: diff --git a/arch/x86/mm/boot_ioremap_32.c b/arch/x86/mm/boot_ioremap_32.c index 4de95a17a7d4..f14da2a53ece 100644 --- a/arch/x86/mm/boot_ioremap_32.c +++ b/arch/x86/mm/boot_ioremap_32.c @@ -10,7 +10,7 @@ /* * We need to use the 2-level pagetable functions, but CONFIG_X86_PAE - * keeps that from happenning. If anyone has a better way, I'm listening. + * keeps that from happening. If anyone has a better way, I'm listening. * * boot_pte_t is defined only if this all works correctly */ diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 860e912a3fbb..13a474d3c6e9 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -40,7 +40,7 @@ struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); -bootmem_data_t node0_bdata; +static bootmem_data_t node0_bdata; /* * numa interface - we expect the numa architecture specific code to have @@ -103,14 +103,14 @@ extern unsigned long highend_pfn, highstart_pfn; #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) -unsigned long node_remap_start_pfn[MAX_NUMNODES]; +static unsigned long node_remap_start_pfn[MAX_NUMNODES]; unsigned long node_remap_size[MAX_NUMNODES]; -unsigned long node_remap_offset[MAX_NUMNODES]; -void *node_remap_start_vaddr[MAX_NUMNODES]; +static unsigned long node_remap_offset[MAX_NUMNODES]; +static void *node_remap_start_vaddr[MAX_NUMNODES]; void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); -void *node_remap_end_vaddr[MAX_NUMNODES]; -void *node_remap_alloc_vaddr[MAX_NUMNODES]; +static void *node_remap_end_vaddr[MAX_NUMNODES]; +static void *node_remap_alloc_vaddr[MAX_NUMNODES]; static unsigned long kva_start_pfn; static unsigned long kva_pages; /* @@ -273,7 +273,7 @@ unsigned long __init setup_memory(void) * When mapping a NUMA machine we allocate the node_mem_map arrays * from node local memory. They are then mapped directly into KVA * between zone normal and vmalloc space. Calculate the size of - * this space and use it to adjust the boundry between ZONE_NORMAL + * this space and use it to adjust the boundary between ZONE_NORMAL * and ZONE_HIGHMEM. */ find_max_pfn(); @@ -288,8 +288,9 @@ unsigned long __init setup_memory(void) #ifdef CONFIG_BLK_DEV_INITRD /* Numa kva area is below the initrd */ - if (LOADER_TYPE && INITRD_START) - kva_start_pfn = PFN_DOWN(INITRD_START) - kva_pages; + if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) + kva_start_pfn = PFN_DOWN(boot_params.hdr.ramdisk_image) + - kva_pages; #endif kva_start_pfn -= kva_start_pfn & (PTRS_PER_PTE-1); @@ -403,7 +404,7 @@ void __init set_highmem_pages_init(int bad_ppro) } #ifdef CONFIG_MEMORY_HOTPLUG -int paddr_to_nid(u64 addr) +static int paddr_to_nid(u64 addr) { int nid; unsigned long pfn = PFN_DOWN(addr); diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c index fcb38e7f3543..a2273d44aa27 100644 --- a/arch/x86/mm/fault_32.c +++ b/arch/x86/mm/fault_32.c @@ -25,6 +25,7 @@ #include <linux/kprobes.h> #include <linux/uaccess.h> #include <linux/kdebug.h> +#include <linux/kprobes.h> #include <asm/system.h> #include <asm/desc.h> @@ -32,33 +33,27 @@ extern void die(const char *,struct pt_regs *,long); -static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); - -int register_page_fault_notifier(struct notifier_block *nb) +#ifdef CONFIG_KPROBES +static inline int notify_page_fault(struct pt_regs *regs) { - vmalloc_sync_all(); - return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); -} -EXPORT_SYMBOL_GPL(register_page_fault_notifier); + int ret = 0; + + /* kprobe_running() needs smp_processor_id() */ + if (!user_mode_vm(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, 14)) + ret = 1; + preempt_enable(); + } -int unregister_page_fault_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); + return ret; } -EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); - -static inline int notify_page_fault(struct pt_regs *regs, long err) +#else +static inline int notify_page_fault(struct pt_regs *regs) { - struct die_args args = { - .regs = regs, - .str = "page fault", - .err = err, - .trapnr = 14, - .signr = SIGSEGV - }; - return atomic_notifier_call_chain(¬ify_page_fault_chain, - DIE_PAGE_FAULT, &args); + return 0; } +#endif /* * Return EIP plus the CS segment base. The segment limit is also @@ -110,7 +105,7 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs, LDT and other horrors are only used in user space. */ if (seg & (1<<2)) { /* Must lock the LDT while reading it. */ - down(¤t->mm->context.sem); + mutex_lock(¤t->mm->context.lock); desc = current->mm->context.ldt; desc = (void *)desc + (seg & ~7); } else { @@ -123,7 +118,7 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs, base = get_desc_base((unsigned long *)desc); if (seg & (1<<2)) { - up(¤t->mm->context.sem); + mutex_unlock(¤t->mm->context.lock); } else put_cpu(); @@ -308,6 +303,11 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs, int write, si_code; int fault; + /* + * We can fault from pretty much anywhere, with unknown IRQ state. + */ + trace_hardirqs_fixup(); + /* get the address */ address = read_cr2(); @@ -331,7 +331,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs, if (unlikely(address >= TASK_SIZE)) { if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0) return; - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + if (notify_page_fault(regs)) return; /* * Don't take the mm semaphore here. If we fixup a prefetch @@ -340,7 +340,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs, goto bad_area_nosemaphore; } - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + if (notify_page_fault(regs)) return; /* It's safe to allow irq's after cr2 has been saved and the vmalloc @@ -359,7 +359,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs, /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the - * kernel and should generate an OOPS. Unfortunatly, in the case of an + * kernel and should generate an OOPS. Unfortunately, in the case of an * erroneous fault occurring in a code path which already holds mmap_sem * we will deadlock attempting to validate the fault against the * address space. Luckily the kernel only validly references user @@ -367,7 +367,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs, * exceptions table. * * As the vast majority of faults will be valid we will only perform - * the source reference check when there is a possibilty of a deadlock. + * the source reference check when there is a possibility of a deadlock. * Attempt to lock the address space, if we cannot we then validate the * source. If this is invalid we can skip the address space check, * thus avoiding the deadlock. @@ -476,8 +476,8 @@ bad_area_nosemaphore: printk_ratelimit()) { printk("%s%s[%d]: segfault at %08lx eip %08lx " "esp %08lx error %lx\n", - tsk->pid > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, tsk->pid, address, regs->eip, + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, task_pid_nr(tsk), address, regs->eip, regs->esp, error_code); } tsk->thread.cr2 = address; @@ -544,23 +544,22 @@ no_context: printk(KERN_ALERT "BUG: unable to handle kernel paging" " request"); printk(" at virtual address %08lx\n",address); - printk(KERN_ALERT " printing eip:\n"); - printk("%08lx\n", regs->eip); + printk(KERN_ALERT "printing eip: %08lx ", regs->eip); page = read_cr3(); page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT]; #ifdef CONFIG_X86_PAE - printk(KERN_ALERT "*pdpt = %016Lx\n", page); + printk("*pdpt = %016Lx ", page); if ((page >> PAGE_SHIFT) < max_low_pfn && page & _PAGE_PRESENT) { page &= PAGE_MASK; page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT) & (PTRS_PER_PMD - 1)]; - printk(KERN_ALERT "*pde = %016Lx\n", page); + printk(KERN_CONT "*pde = %016Lx ", page); page &= ~_PAGE_NX; } #else - printk(KERN_ALERT "*pde = %08lx\n", page); + printk("*pde = %08lx ", page); #endif /* @@ -570,12 +569,15 @@ no_context: * it's allocated already. */ if ((page >> PAGE_SHIFT) < max_low_pfn - && (page & _PAGE_PRESENT)) { + && (page & _PAGE_PRESENT) + && !(page & _PAGE_PSE)) { page &= PAGE_MASK; page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)]; - printk(KERN_ALERT "*pte = %0*Lx\n", sizeof(page)*2, (u64)page); + printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page); } + + printk("\n"); } tsk->thread.cr2 = address; @@ -591,14 +593,14 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (is_init(tsk)) { + if (is_global_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; } printk("VM: killing process %s\n", tsk->comm); if (error_code & 4) - do_exit(SIGKILL); + do_group_exit(SIGKILL); goto no_context; do_sigbus: diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c index 54816adb8e93..0e26230669ca 100644 --- a/arch/x86/mm/fault_64.c +++ b/arch/x86/mm/fault_64.c @@ -25,6 +25,7 @@ #include <linux/kprobes.h> #include <linux/uaccess.h> #include <linux/kdebug.h> +#include <linux/kprobes.h> #include <asm/system.h> #include <asm/pgalloc.h> @@ -40,34 +41,27 @@ #define PF_RSVD (1<<3) #define PF_INSTR (1<<4) -static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); - -/* Hook to register for page fault notifications */ -int register_page_fault_notifier(struct notifier_block *nb) +#ifdef CONFIG_KPROBES +static inline int notify_page_fault(struct pt_regs *regs) { - vmalloc_sync_all(); - return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); -} -EXPORT_SYMBOL_GPL(register_page_fault_notifier); + int ret = 0; + + /* kprobe_running() needs smp_processor_id() */ + if (!user_mode(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, 14)) + ret = 1; + preempt_enable(); + } -int unregister_page_fault_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); + return ret; } -EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); - -static inline int notify_page_fault(struct pt_regs *regs, long err) +#else +static inline int notify_page_fault(struct pt_regs *regs) { - struct die_args args = { - .regs = regs, - .str = "page fault", - .err = err, - .trapnr = 14, - .signr = SIGSEGV - }; - return atomic_notifier_call_chain(¬ify_page_fault_chain, - DIE_PAGE_FAULT, &args); + return 0; } +#endif /* Sometimes the CPU reports invalid exceptions on prefetch. Check that here and ignore. @@ -175,7 +169,7 @@ void dump_pagetable(unsigned long address) pmd = pmd_offset(pud, address); if (bad_address(pmd)) goto bad; printk("PMD %lx ", pmd_val(*pmd)); - if (!pmd_present(*pmd)) goto ret; + if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret; pte = pte_offset_kernel(pmd, address); if (bad_address(pte)) goto bad; @@ -291,7 +285,6 @@ static int vmalloc_fault(unsigned long address) return 0; } -static int page_fault_trace; int show_unhandled_signals = 1; /* @@ -311,6 +304,11 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long flags; siginfo_t info; + /* + * We can fault from pretty much anywhere, with unknown IRQ state. + */ + trace_hardirqs_fixup(); + tsk = current; mm = tsk->mm; prefetchw(&mm->mmap_sem); @@ -345,7 +343,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, if (vmalloc_fault(address) >= 0) return; } - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + if (notify_page_fault(regs)) return; /* * Don't take the mm semaphore here. If we fixup a prefetch @@ -354,16 +352,12 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, goto bad_area_nosemaphore; } - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + if (notify_page_fault(regs)) return; if (likely(regs->eflags & X86_EFLAGS_IF)) local_irq_enable(); - if (unlikely(page_fault_trace)) - printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", - regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); - if (unlikely(error_code & PF_RSVD)) pgtable_bad(address, regs, error_code); @@ -384,7 +378,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, again: /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the - * kernel and should generate an OOPS. Unfortunatly, in the case of an + * kernel and should generate an OOPS. Unfortunately, in the case of an * erroneous fault occurring in a code path which already holds mmap_sem * we will deadlock attempting to validate the fault against the * address space. Luckily the kernel only validly references user @@ -392,7 +386,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, * exceptions table. * * As the vast majority of faults will be valid we will only perform - * the source reference check when there is a possibilty of a deadlock. + * the source reference check when there is a possibility of a deadlock. * Attempt to lock the address space, if we cannot we then validate the * source. If this is invalid we can skip the address space check, * thus avoiding the deadlock. @@ -494,7 +488,7 @@ bad_area_nosemaphore: if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && printk_ratelimit()) { printk( - "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n", + "%s%s[%d]: segfault at %lx rip %lx rsp %lx error %lx\n", tsk->pid > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, tsk->pid, address, regs->rip, regs->rsp, error_code); @@ -560,7 +554,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (is_init(current)) { + if (is_global_init(current)) { yield(); goto again; } @@ -627,10 +621,3 @@ void vmalloc_sync_all(void) BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == (__START_KERNEL & PGDIR_MASK))); } - -static int __init enable_pagefaulttrace(char *str) -{ - page_fault_trace = 1; - return 1; -} -__setup("pagefaulttrace", enable_pagefaulttrace); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 730a5b177b1f..c7d19471261d 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -85,13 +85,20 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) static pte_t * __init one_page_table_init(pmd_t *pmd) { if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { - pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + pte_t *page_table = NULL; + +#ifdef CONFIG_DEBUG_PAGEALLOC + page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); +#endif + if (!page_table) + page_table = + (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT); set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); BUG_ON(page_table != pte_offset_kernel(pmd, 0)); } - + return pte_offset_kernel(pmd, 0); } @@ -735,35 +742,18 @@ int arch_add_memory(int nid, u64 start, u64 size) return __add_pages(zone, start_pfn, nr_pages); } -int remove_memory(u64 start, u64 size) -{ - return -EINVAL; -} -EXPORT_SYMBOL_GPL(remove_memory); #endif struct kmem_cache *pmd_cache; void __init pgtable_cache_init(void) { - size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t); - - if (PTRS_PER_PMD > 1) { + if (PTRS_PER_PMD > 1) pmd_cache = kmem_cache_create("pmd", - PTRS_PER_PMD*sizeof(pmd_t), - PTRS_PER_PMD*sizeof(pmd_t), - SLAB_PANIC, - pmd_ctor); - if (!SHARED_KERNEL_PMD) { - /* If we're in PAE mode and have a non-shared - kernel pmd, then the pgd size must be a - page size. This is because the pgd_list - links through the page structure, so there - can only be one pgd per page for this to - work. */ - pgd_size = PAGE_SIZE; - } - } + PTRS_PER_PMD*sizeof(pmd_t), + PTRS_PER_PMD*sizeof(pmd_t), + SLAB_PANIC, + pmd_ctor); } /* diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 458893b376f8..a7308b2cd058 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -474,12 +474,6 @@ error: } EXPORT_SYMBOL_GPL(arch_add_memory); -int remove_memory(u64 start, u64 size) -{ - return -EINVAL; -} -EXPORT_SYMBOL_GPL(remove_memory); - #if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA) int memory_add_physaddr_to_nid(u64 start) { @@ -734,12 +728,6 @@ int in_gate_area_no_task(unsigned long addr) return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); } -void * __init alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size) -{ - return __alloc_bootmem_core(pgdat->bdata, size, - SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0); -} - const char *arch_vma_name(struct vm_area_struct *vma) { if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) @@ -748,3 +736,48 @@ const char *arch_vma_name(struct vm_area_struct *vma) return "[vsyscall]"; return NULL; } + +#ifdef CONFIG_SPARSEMEM_VMEMMAP +/* + * Initialise the sparsemem vmemmap using huge-pages at the PMD level. + */ +int __meminit vmemmap_populate(struct page *start_page, + unsigned long size, int node) +{ + unsigned long addr = (unsigned long)start_page; + unsigned long end = (unsigned long)(start_page + size); + unsigned long next; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + for (; addr < end; addr = next) { + next = pmd_addr_end(addr, end); + + pgd = vmemmap_pgd_populate(addr, node); + if (!pgd) + return -ENOMEM; + pud = vmemmap_pud_populate(pgd, addr, node); + if (!pud) + return -ENOMEM; + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + pte_t entry; + void *p = vmemmap_alloc_block(PMD_SIZE, node); + if (!p) + return -ENOMEM; + + entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); + mk_pte_huge(entry); + set_pmd(pmd, __pmd(pte_val(entry))); + + printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n", + addr, addr + PMD_SIZE - 1, p, node); + } else + vmemmap_verify((pte_t *)pmd, node, addr, next); + } + + return 0; +} +#endif diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 6da235522269..3d6926ba8995 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -166,7 +166,7 @@ early_node_mem(int nodeid, unsigned long start, unsigned long end, return __va(mem); ptr = __alloc_bootmem_nopanic(size, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)); - if (ptr == 0) { + if (ptr == NULL) { printk(KERN_ERR "Cannot find %lu bytes in node %d\n", size, nodeid); return NULL; @@ -261,7 +261,7 @@ void __init numa_init_array(void) We round robin the existing nodes. */ rr = first_node(node_online_map); for (i = 0; i < NR_CPUS; i++) { - if (cpu_to_node[i] != NUMA_NO_NODE) + if (cpu_to_node(i) != NUMA_NO_NODE) continue; numa_set_node(i, rr); rr = next_node(rr, node_online_map); @@ -543,7 +543,7 @@ __cpuinit void numa_add_cpu(int cpu) void __cpuinit numa_set_node(int cpu, int node) { cpu_pda(cpu)->nodenumber = node; - cpu_to_node[cpu] = node; + cpu_to_node(cpu) = node; } unsigned long __init numa_free_all_bootmem(void) @@ -612,7 +612,7 @@ void __init init_cpu_to_node(void) { int i; for (i = 0; i < NR_CPUS; i++) { - u8 apicid = x86_cpu_to_apicid[i]; + u8 apicid = x86_cpu_to_apicid_init[i]; if (apicid == BAD_APICID) continue; if (apicid_to_node[apicid] == NUMA_NO_NODE) diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c index 4241a74d16c8..260073c07600 100644 --- a/arch/x86/mm/pageattr_32.c +++ b/arch/x86/mm/pageattr_32.c @@ -70,10 +70,10 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, static void cache_flush_page(struct page *p) { - unsigned long adr = (unsigned long)page_address(p); + void *adr = page_address(p); int i; for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) - asm volatile("clflush (%0)" :: "r" (adr + i)); + clflush(adr+i); } static void flush_kernel_map(void *arg) diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c index 10b9809ce821..c40afbaaf93d 100644 --- a/arch/x86/mm/pageattr_64.c +++ b/arch/x86/mm/pageattr_64.c @@ -61,11 +61,11 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, return base; } -static void cache_flush_page(void *adr) +void clflush_cache_range(void *adr, int size) { int i; - for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) - asm volatile("clflush (%0)" :: "r" (adr + i)); + for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size) + clflush(adr+i); } static void flush_kernel_map(void *arg) @@ -80,7 +80,7 @@ static void flush_kernel_map(void *arg) asm volatile("wbinvd" ::: "memory"); else list_for_each_entry(pg, l, lru) { void *adr = page_address(pg); - cache_flush_page(adr); + clflush_cache_range(adr, PAGE_SIZE); } __flush_tlb_all(); } @@ -148,6 +148,7 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, split = split_large_page(address, prot, ref_prot2); if (!split) return -ENOMEM; + pgprot_val(ref_prot2) &= ~_PAGE_NX; set_pte(kpte, mk_pte(split, ref_prot2)); kpte_page = split; } @@ -229,9 +230,14 @@ void global_flush_tlb(void) struct page *pg, *next; struct list_head l; - down_read(&init_mm.mmap_sem); + /* + * Write-protect the semaphore, to exclude two contexts + * doing a list_replace_init() call in parallel and to + * exclude new additions to the deferred_pages list: + */ + down_write(&init_mm.mmap_sem); list_replace_init(&deferred_pages, &l); - up_read(&init_mm.mmap_sem); + up_write(&init_mm.mmap_sem); flush_map(&l); diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 01437c46baae..be61a1d845a4 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -6,6 +6,7 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/mm.h> +#include <linux/nmi.h> #include <linux/swap.h> #include <linux/smp.h> #include <linux/highmem.h> @@ -39,6 +40,8 @@ void show_mem(void) for_each_online_pgdat(pgdat) { pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; ++i) { + if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) + touch_nmi_watchdog(); page = pgdat_page_nr(pgdat, i); total++; if (PageHighMem(page)) @@ -97,8 +100,7 @@ static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) } pte = pte_offset_kernel(pmd, vaddr); if (pgprot_val(flags)) - /* <pfn,flags> stored as-is, to permit clearing entries */ - set_pte(pte, pfn_pte(pfn, flags)); + set_pte_present(&init_mm, vaddr, pte, pfn_pte(pfn, flags)); else pte_clear(&init_mm, vaddr, pte); @@ -193,7 +195,7 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) return pte; } -void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags) +void pmd_ctor(struct kmem_cache *cache, void *pmd) { memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); } diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index acdf03e19146..ea85172fc0cc 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -218,7 +218,7 @@ static inline int save_add_info(void) {return 0;} /* * Update nodes_add and decide if to include add are in the zone. * Both SPARSE and RESERVE need nodes_add infomation. - * This code supports one contigious hot add area per node. + * This code supports one contiguous hot add area per node. */ static int reserve_hotadd(int node, unsigned long start, unsigned long end) { @@ -431,9 +431,9 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) setup_node_bootmem(i, nodes[i].start, nodes[i].end); for (i = 0; i < NR_CPUS; i++) { - if (cpu_to_node[i] == NUMA_NO_NODE) + if (cpu_to_node(i) == NUMA_NO_NODE) continue; - if (!node_isset(cpu_to_node[i], node_possible_map)) + if (!node_isset(cpu_to_node(i), node_possible_map)) numa_set_node(i, NUMA_NO_NODE); } numa_init_array(); diff --git a/arch/x86/oprofile/Kconfig b/arch/x86/oprofile/Kconfig deleted file mode 100644 index d8a84088471a..000000000000 --- a/arch/x86/oprofile/Kconfig +++ /dev/null @@ -1,17 +0,0 @@ -config PROFILING - bool "Profiling support (EXPERIMENTAL)" - help - Say Y here to enable the extended profiling support mechanisms used - by profilers such as OProfile. - - -config OPROFILE - tristate "OProfile system profiling (EXPERIMENTAL)" - depends on PROFILING - help - OProfile is a profiling system capable of profiling the - whole system, include the kernel, kernel modules, libraries, - and applications. - - If unsure, say N. - diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index c049ce414f01..0ed046a187f7 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -13,25 +13,45 @@ #include <linux/mm.h> #include <asm/ptrace.h> #include <asm/uaccess.h> +#include <asm/stacktrace.h> -struct frame_head { - struct frame_head * ebp; - unsigned long ret; -} __attribute__((packed)); +static void backtrace_warning_symbol(void *data, char *msg, + unsigned long symbol) +{ + /* Ignore warnings */ +} -static struct frame_head * -dump_kernel_backtrace(struct frame_head * head) +static void backtrace_warning(void *data, char *msg) { - oprofile_add_trace(head->ret); + /* Ignore warnings */ +} - /* frame pointers should strictly progress back up the stack - * (towards higher addresses) */ - if (head >= head->ebp) - return NULL; +static int backtrace_stack(void *data, char *name) +{ + /* Yes, we want all stacks */ + return 0; +} + +static void backtrace_address(void *data, unsigned long addr) +{ + unsigned int *depth = data; - return head->ebp; + if ((*depth)--) + oprofile_add_trace(addr); } +static struct stacktrace_ops backtrace_ops = { + .warning = backtrace_warning, + .warning_symbol = backtrace_warning_symbol, + .stack = backtrace_stack, + .address = backtrace_address, +}; + +struct frame_head { + struct frame_head *ebp; + unsigned long ret; +} __attribute__((packed)); + static struct frame_head * dump_user_backtrace(struct frame_head * head) { @@ -53,72 +73,16 @@ dump_user_backtrace(struct frame_head * head) return bufhead[0].ebp; } -/* - * | | /\ Higher addresses - * | | - * --------------- stack base (address of current_thread_info) - * | thread info | - * . . - * | stack | - * --------------- saved regs->ebp value if valid (frame_head address) - * . . - * --------------- saved regs->rsp value if x86_64 - * | | - * --------------- struct pt_regs * stored on stack if 32-bit - * | | - * . . - * | | - * --------------- %esp - * | | - * | | \/ Lower addresses - * - * Thus, regs (or regs->rsp for x86_64) <-> stack base restricts the - * valid(ish) ebp values. Note: (1) for x86_64, NMI and several other - * exceptions use special stacks, maintained by the interrupt stack table - * (IST). These stacks are set up in trap_init() in - * arch/x86_64/kernel/traps.c. Thus, for x86_64, regs now does not point - * to the kernel stack; instead, it points to some location on the NMI - * stack. On the other hand, regs->rsp is the stack pointer saved when the - * NMI occurred. (2) For 32-bit, regs->esp is not valid because the - * processor does not save %esp on the kernel stack when interrupts occur - * in the kernel mode. - */ -#ifdef CONFIG_FRAME_POINTER -static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs) -{ - unsigned long headaddr = (unsigned long)head; -#ifdef CONFIG_X86_64 - unsigned long stack = (unsigned long)regs->rsp; -#else - unsigned long stack = (unsigned long)regs; -#endif - unsigned long stack_base = (stack & ~(THREAD_SIZE - 1)) + THREAD_SIZE; - - return headaddr > stack && headaddr < stack_base; -} -#else -/* without fp, it's just junk */ -static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs) -{ - return 0; -} -#endif - - void x86_backtrace(struct pt_regs * const regs, unsigned int depth) { - struct frame_head *head; - -#ifdef CONFIG_X86_64 - head = (struct frame_head *)regs->rbp; -#else - head = (struct frame_head *)regs->ebp; -#endif + struct frame_head *head = (struct frame_head *)frame_pointer(regs); + unsigned long stack = stack_pointer(regs); if (!user_mode_vm(regs)) { - while (depth-- && valid_kernel_stack(head, regs)) - head = dump_kernel_backtrace(head); + if (depth) + dump_trace(NULL, regs, (unsigned long *)stack, + &backtrace_ops, &depth); return; } diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 11b7a51566a8..2d0eeac7251f 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -269,7 +269,6 @@ static void nmi_cpu_shutdown(void * dummy) apic_write(APIC_LVTPC, saved_lvtpc[cpu]); apic_write(APIC_LVTERR, v); nmi_restore_registers(msrs); - model->shutdown(msrs); } @@ -278,6 +277,7 @@ static void nmi_shutdown(void) nmi_enabled = 0; on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); unregister_die_notifier(&profile_exceptions_nb); + model->shutdown(cpu_msrs); free_msrs(); } diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 47925927b12f..56b4757a1f47 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -379,7 +379,7 @@ static unsigned int get_stagger(void) { #ifdef CONFIG_SMP int cpu = smp_processor_id(); - return (cpu != first_cpu(cpu_sibling_map[cpu])); + return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu))); #endif return 0; } diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index abb1aa95b979..45b605fa71d0 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -29,7 +29,7 @@ struct op_msrs { struct pt_regs; /* The model vtable abstracts the differences between - * various x86 CPU model's perfctr support. + * various x86 CPU models' perfctr support. */ struct op_x86_model_spec { unsigned int const num_counters; diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 2d71bbc411d2..f4386990b150 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -289,6 +289,22 @@ static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL685c G1"), }, }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant DL385 G2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL385 G2"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant DL585 G2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL585 G2"), + }, + }, #ifdef __i386__ { .callback = assign_all_busses, diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index c52150fdf82b..88d8f5c0ecb5 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -169,7 +169,7 @@ void eisa_set_level_irq(unsigned int irq) } /* - * Common IRQ routing practice: nybbles in config space, + * Common IRQ routing practice: nibbles in config space, * offset by some magic constant. */ static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr) @@ -585,7 +585,7 @@ static __init int via_router_probe(struct irq_router *r, /* FIXME: We should move some of the quirk fixup stuff here */ /* - * work arounds for some buggy BIOSes + * workarounds for some buggy BIOSes */ if (device == PCI_DEVICE_ID_VIA_82C586_0) { switch(router->device) { diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 8d03de029d9b..e7bff0fbac23 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -13,29 +13,36 @@ vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) $(obj)/vdso.o: $(obj)/vdso.so -targets += vdso.so vdso.lds $(vobjs-y) vdso-syms.o +targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y) vdso-syms.o # The DSO images are built using a special linker script. quiet_cmd_syscall = SYSCALL $@ cmd_syscall = $(CC) -m elf_x86_64 -nostdlib $(SYSCFLAGS_$(@F)) \ -Wl,-T,$(filter-out FORCE,$^) -o $@ -export CPPFLAGS_vdso.lds += -P -C -U$(ARCH) +export CPPFLAGS_vdso.lds += -P -C vdso-flags = -fPIC -shared -Wl,-soname=linux-vdso.so.1 \ $(call ld-option, -Wl$(comma)--hash-style=sysv) \ -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 SYSCFLAGS_vdso.so = $(vdso-flags) +SYSCFLAGS_vdso.so.dbg = $(vdso-flags) $(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so $(obj)/vdso.so: $(src)/vdso.lds $(vobjs) FORCE + +$(obj)/vdso.so.dbg: $(src)/vdso.lds $(vobjs) FORCE $(call if_changed,syscall) +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + CFL := $(PROFILING) -mcmodel=small -fPIC -g0 -O2 -fasynchronous-unwind-tables -m64 -$(obj)/vclock_gettime.o: CFLAGS = $(CFL) -$(obj)/vgetcpu.o: CFLAGS = $(CFL) +$(obj)/vclock_gettime.o: KBUILD_CFLAGS = $(CFL) +$(obj)/vgetcpu.o: KBUILD_CFLAGS = $(CFL) # We also create a special relocatable object that should mirror the symbol # table and layout of the linked DSO. With ld -R we can then refer to @@ -47,3 +54,11 @@ $(obj)/built-in.o: ld_flags += -R $(obj)/vdso-syms.o SYSCFLAGS_vdso-syms.o = -r -d $(obj)/vdso-syms.o: $(src)/vdso.lds $(vobjs) FORCE $(call if_changed,syscall) + +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ +vdso.so: + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso.so diff --git a/arch/x86/vdso/vdso.lds.S b/arch/x86/vdso/vdso.lds.S index b9a60e665d08..667d3245d972 100644 --- a/arch/x86/vdso/vdso.lds.S +++ b/arch/x86/vdso/vdso.lds.S @@ -26,13 +26,16 @@ SECTIONS is insufficient, ld -shared will barf. Just increase it here. */ . = VDSO_PRELINK + VDSO_TEXT_OFFSET; - .text : { *(.text) } :text - .text.ptr : { *(.text.ptr) } :text - . = VDSO_PRELINK + 0x900; - .data : { *(.data) } :text - .bss : { *(.bss) } :text + .text : { *(.text*) } :text + .rodata : { *(.rodata*) } :text + .data : { + *(.data*) + *(.sdata*) + *(.bss*) + *(.dynbss*) + } :text - .altinstructions : { *(.altinstructions) } :text + .altinstructions : { *(.altinstructions) } :text .altinstr_replacement : { *(.altinstr_replacement) } :text .note : { *(.note.*) } :text :note @@ -42,7 +45,6 @@ SECTIONS .useless : { *(.got.plt) *(.got) *(.gnu.linkonce.d.*) - *(.dynbss) *(.gnu.linkonce.b.*) } :text } diff --git a/arch/x86/vdso/vvar.c b/arch/x86/vdso/vvar.c index 6fc22219a472..1b7e703684f9 100644 --- a/arch/x86/vdso/vvar.c +++ b/arch/x86/vdso/vvar.c @@ -8,5 +8,5 @@ #include <asm/timex.h> #include <asm/vgtod.h> -#define VEXTERN(x) typeof (__ ## x) *vdso_ ## x = (void *)VMAGIC; +#define VEXTERN(x) typeof (__ ## x) *const vdso_ ## x = (void *)VMAGIC; #include "vextern.h" diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 9df99e1885a4..fbfa55ce0d55 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -3,8 +3,9 @@ # config XEN - bool "Enable support for Xen hypervisor" - depends on PARAVIRT && X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES + bool "Xen guest support" + select PARAVIRT + depends on X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES && !(X86_VISWS || X86_VOYAGER) help This is the Linux Xen port. Enabling this will allow the kernel to boot in a paravirtualized environment under the diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f01bfcd4bdee..94c39aaf695f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -25,7 +25,6 @@ #include <linux/mm.h> #include <linux/page-flags.h> #include <linux/highmem.h> -#include <linux/smp.h> #include <xen/interface/xen.h> #include <xen/interface/physdev.h> @@ -52,11 +51,25 @@ EXPORT_SYMBOL_GPL(hypercall_page); -DEFINE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode); - DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); -DEFINE_PER_CPU(unsigned long, xen_cr3); + +/* + * Note about cr3 (pagetable base) values: + * + * xen_cr3 contains the current logical cr3 value; it contains the + * last set cr3. This may not be the current effective cr3, because + * its update may be being lazily deferred. However, a vcpu looking + * at its own cr3 can use this value knowing that it everything will + * be self-consistent. + * + * xen_current_cr3 contains the actual vcpu cr3; it is set once the + * hypercall to set the vcpu cr3 is complete (so it may be a little + * out of date, but it will never be set early). If one vcpu is + * looking at another vcpu's cr3 value, it should use this variable. + */ +DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */ +DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ struct start_info *xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); @@ -100,7 +113,7 @@ static void __init xen_vcpu_setup(int cpu) info.mfn = virt_to_mfn(vcpup); info.offset = offset_in_page(vcpup); - printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %x, offset %d\n", + printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n", cpu, vcpup, info.mfn, info.offset); /* Check to see if the hypervisor will put the vcpu_info @@ -124,7 +137,7 @@ static void __init xen_vcpu_setup(int cpu) static void __init xen_banner(void) { printk(KERN_INFO "Booting paravirtualized kernel on %s\n", - paravirt_ops.name); + pv_info.name); printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic); } @@ -249,29 +262,10 @@ static void xen_halt(void) xen_safe_halt(); } -static void xen_set_lazy_mode(enum paravirt_lazy_mode mode) +static void xen_leave_lazy(void) { - BUG_ON(preemptible()); - - switch (mode) { - case PARAVIRT_LAZY_NONE: - BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE); - break; - - case PARAVIRT_LAZY_MMU: - case PARAVIRT_LAZY_CPU: - BUG_ON(x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE); - break; - - case PARAVIRT_LAZY_FLUSH: - /* flush if necessary, but don't change state */ - if (x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE) - xen_mc_flush(); - return; - } - + paravirt_leave_lazy(paravirt_get_lazy_mode()); xen_mc_flush(); - x86_write_percpu(xen_lazy_mode, mode); } static unsigned long xen_store_tr(void) @@ -358,7 +352,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) * loaded properly. This will go away as soon as Xen has been * modified to not save/restore %gs for normal hypercalls. */ - if (xen_get_lazy_mode() == PARAVIRT_LAZY_CPU) + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) loadsegment(gs, 0); } @@ -632,32 +626,36 @@ static unsigned long xen_read_cr3(void) return x86_read_percpu(xen_cr3); } +static void set_current_cr3(void *v) +{ + x86_write_percpu(xen_current_cr3, (unsigned long)v); +} + static void xen_write_cr3(unsigned long cr3) { + struct mmuext_op *op; + struct multicall_space mcs; + unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3)); + BUG_ON(preemptible()); - if (cr3 == x86_read_percpu(xen_cr3)) { - /* just a simple tlb flush */ - xen_flush_tlb(); - return; - } + mcs = xen_mc_entry(sizeof(*op)); /* disables interrupts */ + /* Update while interrupts are disabled, so its atomic with + respect to ipis */ x86_write_percpu(xen_cr3, cr3); + op = mcs.args; + op->cmd = MMUEXT_NEW_BASEPTR; + op->arg1.mfn = mfn; - { - struct mmuext_op *op; - struct multicall_space mcs = xen_mc_entry(sizeof(*op)); - unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3)); - - op = mcs.args; - op->cmd = MMUEXT_NEW_BASEPTR; - op->arg1.mfn = mfn; + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + /* Update xen_update_cr3 once the batch has actually + been submitted. */ + xen_mc_callback(set_current_cr3, (void *)cr3); - xen_mc_issue(PARAVIRT_LAZY_CPU); - } + xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ } /* Early in boot, while setting up the initial pagetable, assume @@ -668,6 +666,15 @@ static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn) make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); } +static void pin_pagetable_pfn(unsigned level, unsigned long pfn) +{ + struct mmuext_op op; + op.cmd = level; + op.arg1.mfn = pfn_to_mfn(pfn); + if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) + BUG(); +} + /* This needs to make sure the new pte page is pinned iff its being attached to a pinned pagetable. */ static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) @@ -677,9 +684,10 @@ static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) if (PagePinned(virt_to_page(mm->pgd))) { SetPagePinned(page); - if (!PageHighMem(page)) + if (!PageHighMem(page)) { make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); - else + pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); + } else /* make sure there are no stray mappings of this page */ kmap_flush_unused(); @@ -692,8 +700,10 @@ static void xen_release_pt(u32 pfn) struct page *page = pfn_to_page(pfn); if (PagePinned(page)) { - if (!PageHighMem(page)) + if (!PageHighMem(page)) { + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); + } } } @@ -738,7 +748,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base) pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; /* special set_pte for pagetable initialization */ - paravirt_ops.set_pte = xen_set_pte_init; + pv_mmu_ops.set_pte = xen_set_pte_init; init_mm.pgd = base; /* @@ -785,8 +795,8 @@ static __init void xen_pagetable_setup_done(pgd_t *base) { /* This will work as long as patching hasn't happened yet (which it hasn't) */ - paravirt_ops.alloc_pt = xen_alloc_pt; - paravirt_ops.set_pte = xen_set_pte; + pv_mmu_ops.alloc_pt = xen_alloc_pt; + pv_mmu_ops.set_pte = xen_set_pte; if (!xen_feature(XENFEAT_auto_translated_physmap)) { /* @@ -808,15 +818,15 @@ static __init void xen_pagetable_setup_done(pgd_t *base) /* Actually pin the pagetable down, but we can't set PG_pinned yet because the page structures don't exist yet. */ { - struct mmuext_op op; + unsigned level; + #ifdef CONFIG_X86_PAE - op.cmd = MMUEXT_PIN_L3_TABLE; + level = MMUEXT_PIN_L3_TABLE; #else - op.cmd = MMUEXT_PIN_L3_TABLE; + level = MMUEXT_PIN_L2_TABLE; #endif - op.arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(base))); - if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) - BUG(); + + pin_pagetable_pfn(level, PFN_DOWN(__pa(base))); } } @@ -833,12 +843,12 @@ void __init xen_setup_vcpu_info_placement(void) if (have_vcpu_info_placement) { printk(KERN_INFO "Xen: using vcpu_info placement\n"); - paravirt_ops.save_fl = xen_save_fl_direct; - paravirt_ops.restore_fl = xen_restore_fl_direct; - paravirt_ops.irq_disable = xen_irq_disable_direct; - paravirt_ops.irq_enable = xen_irq_enable_direct; - paravirt_ops.read_cr2 = xen_read_cr2_direct; - paravirt_ops.iret = xen_iret_direct; + pv_irq_ops.save_fl = xen_save_fl_direct; + pv_irq_ops.restore_fl = xen_restore_fl_direct; + pv_irq_ops.irq_disable = xen_irq_disable_direct; + pv_irq_ops.irq_enable = xen_irq_enable_direct; + pv_mmu_ops.read_cr2 = xen_read_cr2_direct; + pv_cpu_ops.iret = xen_iret_direct; } } @@ -850,8 +860,8 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, start = end = reloc = NULL; -#define SITE(x) \ - case PARAVIRT_PATCH(x): \ +#define SITE(op, x) \ + case PARAVIRT_PATCH(op.x): \ if (have_vcpu_info_placement) { \ start = (char *)xen_##x##_direct; \ end = xen_##x##_direct_end; \ @@ -860,10 +870,10 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, goto patch_site switch (type) { - SITE(irq_enable); - SITE(irq_disable); - SITE(save_fl); - SITE(restore_fl); + SITE(pv_irq_ops, irq_enable); + SITE(pv_irq_ops, irq_disable); + SITE(pv_irq_ops, save_fl); + SITE(pv_irq_ops, restore_fl); #undef SITE patch_site: @@ -895,26 +905,32 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, return ret; } -static const struct paravirt_ops xen_paravirt_ops __initdata = { +static const struct pv_info xen_info __initdata = { .paravirt_enabled = 1, .shared_kernel_pmd = 0, .name = "Xen", - .banner = xen_banner, +}; +static const struct pv_init_ops xen_init_ops __initdata = { .patch = xen_patch, + .banner = xen_banner, .memory_setup = xen_memory_setup, .arch_setup = xen_arch_setup, - .init_IRQ = xen_init_IRQ, .post_allocator_init = xen_mark_init_mm_pinned, +}; +static const struct pv_time_ops xen_time_ops __initdata = { .time_init = xen_time_init, + .set_wallclock = xen_set_wallclock, .get_wallclock = xen_get_wallclock, .get_cpu_khz = xen_cpu_khz, .sched_clock = xen_sched_clock, +}; +static const struct pv_cpu_ops xen_cpu_ops __initdata = { .cpuid = xen_cpuid, .set_debugreg = xen_set_debugreg, @@ -925,22 +941,10 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { .read_cr0 = native_read_cr0, .write_cr0 = native_write_cr0, - .read_cr2 = xen_read_cr2, - .write_cr2 = xen_write_cr2, - - .read_cr3 = xen_read_cr3, - .write_cr3 = xen_write_cr3, - .read_cr4 = native_read_cr4, .read_cr4_safe = native_read_cr4_safe, .write_cr4 = xen_write_cr4, - .save_fl = xen_save_fl, - .restore_fl = xen_restore_fl, - .irq_disable = xen_irq_disable, - .irq_enable = xen_irq_enable, - .safe_halt = xen_safe_halt, - .halt = xen_halt, .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, @@ -969,6 +973,23 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { .set_iopl_mask = xen_set_iopl_mask, .io_delay = xen_io_delay, + .lazy_mode = { + .enter = paravirt_enter_lazy_cpu, + .leave = xen_leave_lazy, + }, +}; + +static const struct pv_irq_ops xen_irq_ops __initdata = { + .init_IRQ = xen_init_IRQ, + .save_fl = xen_save_fl, + .restore_fl = xen_restore_fl, + .irq_disable = xen_irq_disable, + .irq_enable = xen_irq_enable, + .safe_halt = xen_safe_halt, + .halt = xen_halt, +}; + +static const struct pv_apic_ops xen_apic_ops __initdata = { #ifdef CONFIG_X86_LOCAL_APIC .apic_write = xen_apic_write, .apic_write_atomic = xen_apic_write, @@ -977,6 +998,17 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { .setup_secondary_clock = paravirt_nop, .startup_ipi_hook = paravirt_nop, #endif +}; + +static const struct pv_mmu_ops xen_mmu_ops __initdata = { + .pagetable_setup_start = xen_pagetable_setup_start, + .pagetable_setup_done = xen_pagetable_setup_done, + + .read_cr2 = xen_read_cr2, + .write_cr2 = xen_write_cr2, + + .read_cr3 = xen_read_cr3, + .write_cr3 = xen_write_cr3, .flush_tlb_user = xen_flush_tlb, .flush_tlb_kernel = xen_flush_tlb, @@ -986,9 +1018,6 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { .pte_update = paravirt_nop, .pte_update_defer = paravirt_nop, - .pagetable_setup_start = xen_pagetable_setup_start, - .pagetable_setup_done = xen_pagetable_setup_done, - .alloc_pt = xen_alloc_pt_init, .release_pt = xen_release_pt, .alloc_pd = paravirt_nop, @@ -1024,7 +1053,10 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = { .dup_mmap = xen_dup_mmap, .exit_mmap = xen_exit_mmap, - .set_lazy_mode = xen_set_lazy_mode, + .lazy_mode = { + .enter = paravirt_enter_lazy_mmu, + .leave = xen_leave_lazy, + }, }; #ifdef CONFIG_SMP @@ -1080,6 +1112,17 @@ static const struct machine_ops __initdata xen_machine_ops = { }; +static void __init xen_reserve_top(void) +{ + unsigned long top = HYPERVISOR_VIRT_START; + struct xen_platform_parameters pp; + + if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) + top = pp.virt_start; + + reserve_top_address(-top + 2 * PAGE_SIZE); +} + /* First C function to be called on Xen boot */ asmlinkage void __init xen_start_kernel(void) { @@ -1091,7 +1134,14 @@ asmlinkage void __init xen_start_kernel(void) BUG_ON(memcmp(xen_start_info->magic, "xen-3.0", 7) != 0); /* Install Xen paravirt ops */ - paravirt_ops = xen_paravirt_ops; + pv_info = xen_info; + pv_init_ops = xen_init_ops; + pv_time_ops = xen_time_ops; + pv_cpu_ops = xen_cpu_ops; + pv_irq_ops = xen_irq_ops; + pv_apic_ops = xen_apic_ops; + pv_mmu_ops = xen_mmu_ops; + machine_ops = xen_machine_ops; #ifdef CONFIG_SMP @@ -1113,6 +1163,7 @@ asmlinkage void __init xen_start_kernel(void) /* keep using Xen gdt for now; no urgent need to change it */ x86_write_percpu(xen_cr3, __pa(pgd)); + x86_write_percpu(xen_current_cr3, __pa(pgd)); #ifdef CONFIG_SMP /* Don't do the full vcpu_info placement stuff until we have a @@ -1124,12 +1175,12 @@ asmlinkage void __init xen_start_kernel(void) xen_setup_vcpu_info_placement(); #endif - paravirt_ops.kernel_rpl = 1; + pv_info.kernel_rpl = 1; if (xen_feature(XENFEAT_supervisor_mode_kernel)) - paravirt_ops.kernel_rpl = 0; + pv_info.kernel_rpl = 0; /* set the limit of our address space */ - reserve_top_address(-HYPERVISOR_VIRT_START + 2 * PAGE_SIZE); + xen_reserve_top(); /* set up basic CPUID stuff */ cpu_detect(&new_cpu_data); @@ -1137,9 +1188,10 @@ asmlinkage void __init xen_start_kernel(void) new_cpu_data.x86_capability[0] = cpuid_edx(1); /* Poke various useful things into boot_params */ - LOADER_TYPE = (9 << 4) | 0; - INITRD_START = xen_start_info->mod_start ? __pa(xen_start_info->mod_start) : 0; - INITRD_SIZE = xen_start_info->mod_len; + boot_params.hdr.type_of_loader = (9 << 4) | 0; + boot_params.hdr.ramdisk_image = xen_start_info->mod_start + ? __pa(xen_start_info->mod_start) : 0; + boot_params.hdr.ramdisk_size = xen_start_info->mod_len; /* Start the world */ start_kernel(); diff --git a/arch/x86/xen/events.c b/arch/x86/xen/events.c index da1b173547a1..6d1da5809e6f 100644 --- a/arch/x86/xen/events.c +++ b/arch/x86/xen/events.c @@ -383,7 +383,7 @@ static void unbind_from_irq(unsigned int irq) } int bind_evtchn_to_irqhandler(unsigned int evtchn, - irqreturn_t (*handler)(int, void *), + irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id) { @@ -402,7 +402,7 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn, EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, - irqreturn_t (*handler)(int, void *), + irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id) { unsigned int irq; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 874db0cd1d2a..b2e32f9d0071 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -41,7 +41,6 @@ #include <linux/sched.h> #include <linux/highmem.h> #include <linux/bug.h> -#include <linux/sched.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> @@ -155,7 +154,7 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval) { if (mm == current->mm || mm == &init_mm) { - if (xen_get_lazy_mode() == PARAVIRT_LAZY_MMU) { + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { struct multicall_space mcs; mcs = xen_mc_entry(0); @@ -304,7 +303,12 @@ pgd_t xen_make_pgd(unsigned long pgd) } #endif /* CONFIG_X86_PAE */ - +enum pt_level { + PT_PGD, + PT_PUD, + PT_PMD, + PT_PTE +}; /* (Yet another) pagetable walker. This one is intended for pinning a @@ -316,7 +320,7 @@ pgd_t xen_make_pgd(unsigned long pgd) FIXADDR_TOP. But the important bit is that we don't pin beyond there, because then we start getting into Xen's ptes. */ -static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned), +static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, enum pt_level), unsigned long limit) { pgd_t *pgd = pgd_base; @@ -341,7 +345,7 @@ static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned), pud = pud_offset(pgd, 0); if (PTRS_PER_PUD > 1) /* not folded */ - flush |= (*func)(virt_to_page(pud), 0); + flush |= (*func)(virt_to_page(pud), PT_PUD); for (; addr != pud_limit; pud++, addr = pud_next) { pmd_t *pmd; @@ -360,7 +364,7 @@ static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned), pmd = pmd_offset(pud, 0); if (PTRS_PER_PMD > 1) /* not folded */ - flush |= (*func)(virt_to_page(pmd), 0); + flush |= (*func)(virt_to_page(pmd), PT_PMD); for (; addr != pmd_limit; pmd++) { addr += (PAGE_SIZE * PTRS_PER_PTE); @@ -372,17 +376,47 @@ static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned), if (pmd_none(*pmd)) continue; - flush |= (*func)(pmd_page(*pmd), 0); + flush |= (*func)(pmd_page(*pmd), PT_PTE); } } } - flush |= (*func)(virt_to_page(pgd_base), UVMF_TLB_FLUSH); + flush |= (*func)(virt_to_page(pgd_base), PT_PGD); return flush; } -static int pin_page(struct page *page, unsigned flags) +static spinlock_t *lock_pte(struct page *page) +{ + spinlock_t *ptl = NULL; + +#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS + ptl = __pte_lockptr(page); + spin_lock(ptl); +#endif + + return ptl; +} + +static void do_unlock(void *v) +{ + spinlock_t *ptl = v; + spin_unlock(ptl); +} + +static void xen_do_pin(unsigned level, unsigned long pfn) +{ + struct mmuext_op *op; + struct multicall_space mcs; + + mcs = __xen_mc_entry(sizeof(*op)); + op = mcs.args; + op->cmd = level; + op->arg1.mfn = pfn_to_mfn(pfn); + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); +} + +static int pin_page(struct page *page, enum pt_level level) { unsigned pgfl = test_and_set_bit(PG_pinned, &page->flags); int flush; @@ -397,12 +431,26 @@ static int pin_page(struct page *page, unsigned flags) void *pt = lowmem_page_address(page); unsigned long pfn = page_to_pfn(page); struct multicall_space mcs = __xen_mc_entry(0); + spinlock_t *ptl; flush = 0; + ptl = NULL; + if (level == PT_PTE) + ptl = lock_pte(page); + MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, pfn_pte(pfn, PAGE_KERNEL_RO), - flags); + level == PT_PGD ? UVMF_TLB_FLUSH : 0); + + if (level == PT_PTE) + xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn); + + if (ptl) { + /* Queue a deferred unlock for when this batch + is completed. */ + xen_mc_callback(do_unlock, ptl); + } } return flush; @@ -413,8 +461,7 @@ static int pin_page(struct page *page, unsigned flags) read-only, and can be pinned. */ void xen_pgd_pin(pgd_t *pgd) { - struct multicall_space mcs; - struct mmuext_op *op; + unsigned level; xen_mc_batch(); @@ -425,16 +472,13 @@ void xen_pgd_pin(pgd_t *pgd) xen_mc_batch(); } - mcs = __xen_mc_entry(sizeof(*op)); - op = mcs.args; - #ifdef CONFIG_X86_PAE - op->cmd = MMUEXT_PIN_L3_TABLE; + level = MMUEXT_PIN_L3_TABLE; #else - op->cmd = MMUEXT_PIN_L2_TABLE; + level = MMUEXT_PIN_L2_TABLE; #endif - op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd))); - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + xen_do_pin(level, PFN_DOWN(__pa(pgd))); xen_mc_issue(0); } @@ -442,7 +486,7 @@ void xen_pgd_pin(pgd_t *pgd) /* The init_mm pagetable is really pinned as soon as its created, but that's before we have page structures to store the bits. So do all the book-keeping now. */ -static __init int mark_pinned(struct page *page, unsigned flags) +static __init int mark_pinned(struct page *page, enum pt_level level) { SetPagePinned(page); return 0; @@ -453,18 +497,32 @@ void __init xen_mark_init_mm_pinned(void) pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP); } -static int unpin_page(struct page *page, unsigned flags) +static int unpin_page(struct page *page, enum pt_level level) { unsigned pgfl = test_and_clear_bit(PG_pinned, &page->flags); if (pgfl && !PageHighMem(page)) { void *pt = lowmem_page_address(page); unsigned long pfn = page_to_pfn(page); - struct multicall_space mcs = __xen_mc_entry(0); + spinlock_t *ptl = NULL; + struct multicall_space mcs; + + if (level == PT_PTE) { + ptl = lock_pte(page); + + xen_do_pin(MMUEXT_UNPIN_TABLE, pfn); + } + + mcs = __xen_mc_entry(0); MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, pfn_pte(pfn, PAGE_KERNEL), - flags); + level == PT_PGD ? UVMF_TLB_FLUSH : 0); + + if (ptl) { + /* unlock when batch completed */ + xen_mc_callback(do_unlock, ptl); + } } return 0; /* never need to flush on unpin */ @@ -473,18 +531,9 @@ static int unpin_page(struct page *page, unsigned flags) /* Release a pagetables pages back as normal RW */ static void xen_pgd_unpin(pgd_t *pgd) { - struct mmuext_op *op; - struct multicall_space mcs; - xen_mc_batch(); - mcs = __xen_mc_entry(sizeof(*op)); - - op = mcs.args; - op->cmd = MMUEXT_UNPIN_TABLE; - op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd))); - - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); pgd_walk(pgd, unpin_page, TASK_SIZE); @@ -515,20 +564,43 @@ static void drop_other_mm_ref(void *info) if (__get_cpu_var(cpu_tlbstate).active_mm == mm) leave_mm(smp_processor_id()); + + /* If this cpu still has a stale cr3 reference, then make sure + it has been flushed. */ + if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) { + load_cr3(swapper_pg_dir); + arch_flush_lazy_cpu_mode(); + } } static void drop_mm_ref(struct mm_struct *mm) { + cpumask_t mask; + unsigned cpu; + if (current->active_mm == mm) { if (current->mm == mm) load_cr3(swapper_pg_dir); else leave_mm(smp_processor_id()); + arch_flush_lazy_cpu_mode(); } - if (!cpus_empty(mm->cpu_vm_mask)) - xen_smp_call_function_mask(mm->cpu_vm_mask, drop_other_mm_ref, - mm, 1); + /* Get the "official" set of cpus referring to our pagetable. */ + mask = mm->cpu_vm_mask; + + /* It's possible that a vcpu may have a stale reference to our + cr3, because its in lazy mode, and it hasn't yet flushed + its set of pending hypercalls yet. In this case, we can + look at its actual current cr3 value, and force it to flush + if needed. */ + for_each_online_cpu(cpu) { + if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd)) + cpu_set(cpu, mask); + } + + if (!cpus_empty(mask)) + xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); } #else static void drop_mm_ref(struct mm_struct *mm) @@ -563,5 +635,6 @@ void xen_exit_mmap(struct mm_struct *mm) /* pgd may not be pinned in the error exit path of execve */ if (PagePinned(virt_to_page(mm->pgd))) xen_pgd_unpin(mm->pgd); + spin_unlock(&mm->page_table_lock); } diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index c837e8e463db..5e6f36f6d876 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -26,13 +26,22 @@ #include "multicalls.h" +#define MC_DEBUG 1 + #define MC_BATCH 32 #define MC_ARGS (MC_BATCH * 16 / sizeof(u64)) struct mc_buffer { struct multicall_entry entries[MC_BATCH]; +#if MC_DEBUG + struct multicall_entry debug[MC_BATCH]; +#endif u64 args[MC_ARGS]; - unsigned mcidx, argidx; + struct callback { + void (*fn)(void *); + void *data; + } callbacks[MC_BATCH]; + unsigned mcidx, argidx, cbidx; }; static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); @@ -43,6 +52,7 @@ void xen_mc_flush(void) struct mc_buffer *b = &__get_cpu_var(mc_buffer); int ret = 0; unsigned long flags; + int i; BUG_ON(preemptible()); @@ -51,13 +61,31 @@ void xen_mc_flush(void) local_irq_save(flags); if (b->mcidx) { - int i; +#if MC_DEBUG + memcpy(b->debug, b->entries, + b->mcidx * sizeof(struct multicall_entry)); +#endif if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0) BUG(); for (i = 0; i < b->mcidx; i++) if (b->entries[i].result < 0) ret++; + +#if MC_DEBUG + if (ret) { + printk(KERN_ERR "%d multicall(s) failed: cpu %d\n", + ret, smp_processor_id()); + for(i = 0; i < b->mcidx; i++) { + printk(" call %2d/%d: op=%lu arg=[%lx] result=%ld\n", + i+1, b->mcidx, + b->debug[i].op, + b->debug[i].args[0], + b->entries[i].result); + } + } +#endif + b->mcidx = 0; b->argidx = 0; } else @@ -65,6 +93,13 @@ void xen_mc_flush(void) local_irq_restore(flags); + for(i = 0; i < b->cbidx; i++) { + struct callback *cb = &b->callbacks[i]; + + (*cb->fn)(cb->data); + } + b->cbidx = 0; + BUG_ON(ret); } @@ -88,3 +123,16 @@ struct multicall_space __xen_mc_entry(size_t args) return ret; } + +void xen_mc_callback(void (*fn)(void *), void *data) +{ + struct mc_buffer *b = &__get_cpu_var(mc_buffer); + struct callback *cb; + + if (b->cbidx == MC_BATCH) + xen_mc_flush(); + + cb = &b->callbacks[b->cbidx++]; + cb->fn = fn; + cb->data = data; +} diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index e6f7530b156c..8bae996d99a3 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h @@ -35,11 +35,14 @@ void xen_mc_flush(void); /* Issue a multicall if we're not in a lazy mode */ static inline void xen_mc_issue(unsigned mode) { - if ((xen_get_lazy_mode() & mode) == 0) + if ((paravirt_get_lazy_mode() & mode) == 0) xen_mc_flush(); /* restore flags saved in xen_mc_batch */ local_irq_restore(x86_read_percpu(xen_mc_irq_flags)); } +/* Set up a callback to be called when the current batch is flushed */ +void xen_mc_callback(void (*fn)(void *), void *data); + #endif /* _XEN_MULTICALLS_H */ diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 557b8e24706a..c1b131bcdcbe 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -147,8 +147,13 @@ void __init xen_smp_prepare_boot_cpu(void) make_lowmem_page_readwrite(&per_cpu__gdt_page); for (cpu = 0; cpu < NR_CPUS; cpu++) { - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); + cpus_clear(per_cpu(cpu_sibling_map, cpu)); + /* + * cpu_core_map lives in a per cpu area that is cleared + * when the per cpu array is allocated. + * + * cpus_clear(per_cpu(cpu_core_map, cpu)); + */ } xen_setup_vcpu_info_placement(); @@ -159,8 +164,13 @@ void __init xen_smp_prepare_cpus(unsigned int max_cpus) unsigned cpu; for (cpu = 0; cpu < NR_CPUS; cpu++) { - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); + cpus_clear(per_cpu(cpu_sibling_map, cpu)); + /* + * cpu_core_ map will be zeroed when the per + * cpu area is allocated. + * + * cpus_clear(per_cpu(cpu_core_map, cpu)); + */ } smp_store_cpu_info(0); @@ -346,6 +356,7 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) */ irq_enter(); (*func)(info); + __get_cpu_var(irq_stat).irq_call_count++; irq_exit(); if (wait) { @@ -360,7 +371,8 @@ int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info, int wait) { struct call_data_struct data; - int cpus; + int cpus, cpu; + bool yield; /* Holding any lock stops cpus from going down. */ spin_lock(&call_lock); @@ -389,9 +401,14 @@ int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), /* Send a message to other CPUs and wait for them to respond */ xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); - /* Make sure other vcpus get a chance to run. - XXX too severe? Maybe we should check the other CPU's states? */ - HYPERVISOR_sched_op(SCHEDOP_yield, 0); + /* Make sure other vcpus get a chance to run if they need to. */ + yield = false; + for_each_cpu_mask(cpu, mask) + if (xen_vcpu_stolen(cpu)) + yield = true; + + if (yield) + HYPERVISOR_sched_op(SCHEDOP_yield, 0); /* Wait for response */ while (atomic_read(&data.started) != cpus || diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index dfd6db69ead5..d083ff5ef088 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -105,6 +105,12 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res) } while (get64(&state->state_entry_time) != state_time); } +/* return true when a vcpu could run but has no real cpu to run on */ +bool xen_vcpu_stolen(int vcpu) +{ + return per_cpu(runstate, vcpu).state == RUNSTATE_runnable; +} + static void setup_runstate_info(int cpu) { struct vcpu_register_runstate_memory_area area; diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index b9aaea45f07f..b02a909bfd4c 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -11,6 +11,7 @@ void xen_copy_trap_info(struct trap_info *traps); DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); DECLARE_PER_CPU(unsigned long, xen_cr3); +DECLARE_PER_CPU(unsigned long, xen_current_cr3); extern struct start_info *xen_start_info; extern struct shared_info *HYPERVISOR_shared_info; @@ -27,14 +28,9 @@ unsigned long xen_get_wallclock(void); int xen_set_wallclock(unsigned long time); unsigned long long xen_sched_clock(void); -void xen_mark_init_mm_pinned(void); - -DECLARE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode); +bool xen_vcpu_stolen(int vcpu); -static inline unsigned xen_get_lazy_mode(void) -{ - return x86_read_percpu(xen_lazy_mode); -} +void xen_mark_init_mm_pinned(void); void __init xen_fill_possible_map(void); |