summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig57
-rw-r--r--arch/x86/Kconfig.cpu1
-rw-r--r--arch/x86/Kconfig.debug4
-rw-r--r--arch/x86/boot/tty.c2
-rw-r--r--arch/x86/include/asm/acpi.h1
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h24
-rw-r--r--arch/x86/include/asm/apic.h1
-rw-r--r--arch/x86/include/asm/bigsmp/apic.h32
-rw-r--r--arch/x86/include/asm/bigsmp/ipi.h13
-rw-r--r--arch/x86/include/asm/dma-mapping.h6
-rw-r--r--arch/x86/include/asm/ds.h140
-rw-r--r--arch/x86/include/asm/emergency-restart.h4
-rw-r--r--arch/x86/include/asm/es7000/apic.h141
-rw-r--r--arch/x86/include/asm/es7000/ipi.h12
-rw-r--r--arch/x86/include/asm/es7000/wakecpu.h41
-rw-r--r--arch/x86/include/asm/ftrace.h34
-rw-r--r--arch/x86/include/asm/genapic_32.h32
-rw-r--r--arch/x86/include/asm/genapic_64.h16
-rw-r--r--arch/x86/include/asm/io_apic.h9
-rw-r--r--arch/x86/include/asm/iomap.h30
-rw-r--r--arch/x86/include/asm/iommu.h1
-rw-r--r--arch/x86/include/asm/ipi.h23
-rw-r--r--arch/x86/include/asm/irq.h2
-rw-r--r--arch/x86/include/asm/irq_vectors.h11
-rw-r--r--arch/x86/include/asm/mach-default/mach_apic.h29
-rw-r--r--arch/x86/include/asm/mach-default/mach_ipi.h18
-rw-r--r--arch/x86/include/asm/mach-default/mach_wakecpu.h24
-rw-r--r--arch/x86/include/asm/mach-default/smpboot_hooks.h8
-rw-r--r--arch/x86/include/asm/mach-generic/mach_apic.h2
-rw-r--r--arch/x86/include/asm/mach-generic/mach_wakecpu.h12
-rw-r--r--arch/x86/include/asm/mmzone_32.h4
-rw-r--r--arch/x86/include/asm/numaq/apic.h12
-rw-r--r--arch/x86/include/asm/numaq/ipi.h13
-rw-r--r--arch/x86/include/asm/numaq/wakecpu.h24
-rw-r--r--arch/x86/include/asm/pci_64.h14
-rw-r--r--arch/x86/include/asm/ptrace.h2
-rw-r--r--arch/x86/include/asm/setup.h3
-rw-r--r--arch/x86/include/asm/smp.h6
-rw-r--r--arch/x86/include/asm/summit/apic.h51
-rw-r--r--arch/x86/include/asm/summit/ipi.h9
-rw-r--r--arch/x86/include/asm/system.h2
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/topology.h4
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/include/asm/uaccess_32.h8
-rw-r--r--arch/x86/include/asm/uaccess_64.h8
-rw-r--r--arch/x86/include/asm/unistd_64.h4
-rw-r--r--arch/x86/include/asm/vmi.h8
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/boot.c12
-rw-r--r--arch/x86/kernel/amd_iommu.c51
-rw-r--r--arch/x86/kernel/amd_iommu_init.c6
-rw-r--r--arch/x86/kernel/apic.c14
-rw-r--r--arch/x86/kernel/apm_32.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c18
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h17
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c45
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c108
-rw-r--r--arch/x86/kernel/ds.c741
-rw-r--r--arch/x86/kernel/dumpstack.c351
-rw-r--r--arch/x86/kernel/dumpstack.h39
-rw-r--r--arch/x86/kernel/dumpstack_32.c307
-rw-r--r--arch/x86/kernel/dumpstack_64.c289
-rw-r--r--arch/x86/kernel/early-quirks.c18
-rw-r--r--arch/x86/kernel/entry_32.S51
-rw-r--r--arch/x86/kernel/entry_64.S83
-rw-r--r--arch/x86/kernel/es7000_32.c71
-rw-r--r--arch/x86/kernel/ftrace.c390
-rw-r--r--arch/x86/kernel/genapic_64.c4
-rw-r--r--arch/x86/kernel/genapic_flat_64.c105
-rw-r--r--arch/x86/kernel/genx2apic_cluster.c77
-rw-r--r--arch/x86/kernel/genx2apic_phys.c72
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c59
-rw-r--r--arch/x86/kernel/hpet.c8
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/i8253.c2
-rw-r--r--arch/x86/kernel/io_apic.c1049
-rw-r--r--arch/x86/kernel/ipi.c28
-rw-r--r--arch/x86/kernel/irq.c3
-rw-r--r--arch/x86/kernel/irq_32.c15
-rw-r--r--arch/x86/kernel/irq_64.c17
-rw-r--r--arch/x86/kernel/irqinit_32.c3
-rw-r--r--arch/x86/kernel/irqinit_64.c3
-rw-r--r--arch/x86/kernel/kvmclock.c2
-rw-r--r--arch/x86/kernel/mfgpt_32.c2
-rw-r--r--arch/x86/kernel/mpparse.c24
-rw-r--r--arch/x86/kernel/numaq_32.c10
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c3
-rw-r--r--arch/x86/kernel/pci-calgary_64.c2
-rw-r--r--arch/x86/kernel/pci-gart_64.c2
-rw-r--r--arch/x86/kernel/process.c32
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/ptrace.c98
-rw-r--r--arch/x86/kernel/reboot.c45
-rw-r--r--arch/x86/kernel/setup.c31
-rw-r--r--arch/x86/kernel/setup_percpu.c19
-rw-r--r--arch/x86/kernel/smp.c21
-rw-r--r--arch/x86/kernel/smpboot.c27
-rw-r--r--arch/x86/kernel/stacktrace.c64
-rw-r--r--arch/x86/kernel/tlb_32.c2
-rw-r--r--arch/x86/kernel/tlb_64.c2
-rw-r--r--arch/x86/kernel/tsc_sync.c4
-rw-r--r--arch/x86/kernel/vmi_32.c16
-rw-r--r--arch/x86/kernel/vmiclock_32.c2
-rw-r--r--arch/x86/kernel/vsyscall_64.c3
-rw-r--r--arch/x86/kernel/xsave.c2
-rw-r--r--arch/x86/kvm/Kconfig2
-rw-r--r--arch/x86/kvm/i8254.c4
-rw-r--r--arch/x86/kvm/mmu.c4
-rw-r--r--arch/x86/kvm/paging_tmpl.h1
-rw-r--r--arch/x86/kvm/vmx.c7
-rw-r--r--arch/x86/kvm/vmx.h1
-rw-r--r--arch/x86/lguest/boot.c2
-rw-r--r--arch/x86/lib/usercopy_32.c8
-rw-r--r--arch/x86/lib/usercopy_64.c4
-rw-r--r--arch/x86/mach-generic/bigsmp.c6
-rw-r--r--arch/x86/mach-generic/default.c1
-rw-r--r--arch/x86/mach-generic/es7000.c19
-rw-r--r--arch/x86/mach-generic/numaq.c5
-rw-r--r--arch/x86/mach-generic/probe.c16
-rw-r--r--arch/x86/mach-generic/summit.c6
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c25
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/fault.c13
-rw-r--r--arch/x86/mm/numa_32.c35
-rw-r--r--arch/x86/mm/numa_64.c4
-rw-r--r--arch/x86/mm/srat_64.c2
-rw-r--r--arch/x86/oprofile/nmi_int.c5
-rw-r--r--arch/x86/oprofile/op_model_ppro.c6
-rw-r--r--arch/x86/pci/direct.c4
-rw-r--r--arch/x86/pci/fixup.c25
-rw-r--r--arch/x86/pci/pci.h1
-rw-r--r--arch/x86/power/hibernate_32.c4
-rw-r--r--arch/x86/vdso/vclock_gettime.c3
-rw-r--r--arch/x86/xen/mmu.c41
-rw-r--r--arch/x86/xen/smp.c29
-rw-r--r--arch/x86/xen/suspend.c3
-rw-r--r--arch/x86/xen/time.c2
-rw-r--r--arch/x86/xen/xen-ops.h4
142 files changed, 3501 insertions, 2195 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 93224b569187..0ca2eb7573cd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -29,11 +29,14 @@ config X86
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
+ select HAVE_FUNCTION_GRAPH_TRACER
+ select HAVE_FUNCTION_TRACE_MCOUNT_TEST
select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
select HAVE_ARCH_KGDB if !X86_VOYAGER
select HAVE_ARCH_TRACEHOOK
select HAVE_GENERIC_DMA_COHERENT if X86_32
select HAVE_EFFICIENT_UNALIGNED_ACCESS
+ select USER_STACKTRACE_SUPPORT
config ARCH_DEFCONFIG
string
@@ -167,9 +170,12 @@ config GENERIC_PENDING_IRQ
config X86_SMP
bool
depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64)
- select USE_GENERIC_SMP_HELPERS
default y
+config USE_GENERIC_SMP_HELPERS
+ def_bool y
+ depends on SMP
+
config X86_32_SMP
def_bool y
depends on X86_32 && SMP
@@ -235,6 +241,25 @@ config X86_HAS_BOOT_CPU_ID
def_bool y
depends on X86_VOYAGER
+config SPARSE_IRQ
+ bool "Support sparse irq numbering"
+ depends on PCI_MSI || HT_IRQ
+ default y
+ help
+ This enables support for sparse irq, esp for msi/msi-x. You may need
+ if you have lots of cards supports msi-x installed.
+
+ If you don't know what to do here, say Y.
+
+config NUMA_MIGRATE_IRQ_DESC
+ bool "Move irq desc when changing irq smp_affinity"
+ depends on SPARSE_IRQ && SMP
+ default n
+ help
+ This enables moving irq_desc to cpu/node that irq will use handled.
+
+ If you don't know what to do here, say N.
+
config X86_FIND_SMP_CONFIG
def_bool y
depends on X86_MPPARSE || X86_VOYAGER
@@ -364,10 +389,10 @@ config X86_RDC321X
as R-8610-(G).
If you don't have one of these chips, you should say N here.
-config SCHED_NO_NO_OMIT_FRAME_POINTER
+config SCHED_OMIT_FRAME_POINTER
def_bool y
prompt "Single-depth WCHAN output"
- depends on X86_32
+ depends on X86
help
Calculate simpler /proc/<PID>/wchan values. If this option
is disabled then wchan values will recurse back to the
@@ -462,10 +487,6 @@ config X86_CYCLONE_TIMER
def_bool y
depends on X86_GENERICARCH
-config ES7000_CLUSTERED_APIC
- def_bool y
- depends on SMP && X86_ES7000 && MPENTIUMIII
-
source "arch/x86/Kconfig.cpu"
config HPET_TIMER
@@ -579,19 +600,20 @@ config IOMMU_HELPER
config MAXSMP
bool "Configure Maximum number of SMP Processors and NUMA Nodes"
- depends on X86_64 && SMP && BROKEN
+ depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
+ select CPUMASK_OFFSTACK
default n
help
Configure maximum number of CPUS and NUMA Nodes for this architecture.
If unsure, say N.
config NR_CPUS
- int "Maximum number of CPUs (2-512)" if !MAXSMP
- range 2 512
- depends on SMP
+ int "Maximum number of CPUs" if SMP && !MAXSMP
+ range 2 512 if SMP && !MAXSMP
+ default "1" if !SMP
default "4096" if MAXSMP
- default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
- default "8"
+ default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000)
+ default "8" if SMP
help
This allows you to specify the maximum number of CPUs which this
kernel will support. The maximum supported value is 512 and the
@@ -957,7 +979,7 @@ config ARCH_PHYS_ADDR_T_64BIT
config NUMA
bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)"
depends on SMP
- depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && BROKEN)
+ depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL)
default n if X86_PC
default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
help
@@ -1629,13 +1651,6 @@ config APM_ALLOW_INTS
many of the newer IBM Thinkpads. If you experience hangs when you
suspend, try setting this to Y. Otherwise, say N.
-config APM_REAL_MODE_POWER_OFF
- bool "Use real mode APM BIOS call to power off"
- help
- Use real mode APM BIOS calls to switch off the computer. This is
- a work-around for a number of buggy BIOSes. Switch this option on if
- your computer crashes instead of powering off properly.
-
endif # APM
source "arch/x86/kernel/cpu/cpufreq/Kconfig"
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index b815664fe370..85a78575956c 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -515,6 +515,7 @@ config CPU_SUP_UMC_32
config X86_DS
def_bool X86_PTRACE_BTS
depends on X86_DEBUGCTLMSR
+ select HAVE_HW_BRANCH_TRACER
config X86_PTRACE_BTS
bool "Branch Trace Store"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2a3dfbd5e677..fa013f529b74 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -186,14 +186,10 @@ config IOMMU_LEAK
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
-config MMIOTRACE_HOOKS
- bool
-
config MMIOTRACE
bool "Memory mapped IO tracing"
depends on DEBUG_KERNEL && PCI
select TRACING
- select MMIOTRACE_HOOKS
help
Mmiotrace traces Memory Mapped I/O access and is meant for
debugging and reverse engineering. It is called from the ioremap
diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c
index 0be77b39328a..7e8e8b25f5f6 100644
--- a/arch/x86/boot/tty.c
+++ b/arch/x86/boot/tty.c
@@ -74,7 +74,7 @@ static int kbd_pending(void)
{
u8 pending;
asm volatile("int $0x16; setnz %0"
- : "=rm" (pending)
+ : "=qm" (pending)
: "a" (0x0100));
return pending;
}
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 8d676d8ecde9..9830681446ad 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -113,7 +113,6 @@ static inline void acpi_disable_pci(void)
acpi_pci_disabled = 1;
acpi_noirq_set();
}
-extern int acpi_irq_balance_set(char *str);
/* routines for saving/restoring kernel state */
extern int acpi_save_state_mem(void);
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 1a30c0440c6b..ac302a2fa339 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -251,13 +251,6 @@ struct amd_iommu {
/* Pointer to PCI device of this IOMMU */
struct pci_dev *dev;
- /*
- * Capability pointer. There could be more than one IOMMU per PCI
- * device function if there are more than one AMD IOMMU capability
- * pointers.
- */
- u16 cap_ptr;
-
/* physical address of MMIO space */
u64 mmio_phys;
/* virtual address of MMIO space */
@@ -266,6 +259,13 @@ struct amd_iommu {
/* capabilities of that IOMMU read from ACPI */
u32 cap;
+ /*
+ * Capability pointer. There could be more than one IOMMU per PCI
+ * device function if there are more than one AMD IOMMU capability
+ * pointers.
+ */
+ u16 cap_ptr;
+
/* pci domain of this IOMMU */
u16 pci_seg;
@@ -284,19 +284,19 @@ struct amd_iommu {
/* size of command buffer */
u32 cmd_buf_size;
- /* event buffer virtual address */
- u8 *evt_buf;
/* size of event buffer */
u32 evt_buf_size;
+ /* event buffer virtual address */
+ u8 *evt_buf;
/* MSI number for event interrupt */
u16 evt_msi_num;
- /* if one, we need to send a completion wait command */
- int need_sync;
-
/* true if interrupts for this IOMMU are already enabled */
bool int_enabled;
+ /* if one, we need to send a completion wait command */
+ int need_sync;
+
/* default dma_ops domain for that IOMMU */
struct dma_ops_domain *default_dom;
};
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3b1510b4fc57..25caa0738af5 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -193,6 +193,7 @@ extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask);
static inline void lapic_shutdown(void) { }
#define local_apic_timer_c2_ok 1
static inline void init_apic_mappings(void) { }
+static inline void disable_local_APIC(void) { }
#endif /* !CONFIG_X86_LOCAL_APIC */
diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h
index 1d9543b9d358..976399debb3f 100644
--- a/arch/x86/include/asm/bigsmp/apic.h
+++ b/arch/x86/include/asm/bigsmp/apic.h
@@ -9,12 +9,12 @@ static inline int apic_id_registered(void)
return (1);
}
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus(void)
{
#ifdef CONFIG_SMP
- return cpu_online_map;
+ return &cpu_online_map;
#else
- return cpumask_of_cpu(0);
+ return &cpumask_of_cpu(0);
#endif
}
@@ -24,8 +24,6 @@ static inline cpumask_t target_cpus(void)
#define INT_DELIVERY_MODE (dest_Fixed)
#define INT_DEST_MODE (0) /* phys delivery to target proc */
#define NO_BALANCE_IRQ (0)
-#define WAKE_SECONDARY_VIA_INIT
-
static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
{
@@ -81,7 +79,7 @@ static inline int apicid_to_node(int logical_apicid)
static inline int cpu_present_to_apicid(int mps_cpu)
{
- if (mps_cpu < NR_CPUS)
+ if (mps_cpu < nr_cpu_ids)
return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
return BAD_APICID;
@@ -96,7 +94,7 @@ extern u8 cpu_2_logical_apicid[];
/* Mapping from cpu number to logical apicid */
static inline int cpu_to_logical_apicid(int cpu)
{
- if (cpu >= NR_CPUS)
+ if (cpu >= nr_cpu_ids)
return BAD_APICID;
return cpu_physical_id(cpu);
}
@@ -121,16 +119,32 @@ static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
}
/* As we are using single CPU as destination, pick only one CPU here */
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
{
int cpu;
int apicid;
- cpu = first_cpu(cpumask);
+ cpu = first_cpu(*cpumask);
apicid = cpu_to_logical_apicid(cpu);
return apicid;
}
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ cpu = cpumask_any_and(cpumask, andmask);
+ if (cpu < nr_cpu_ids)
+ return cpu_to_logical_apicid(cpu);
+
+ return BAD_APICID;
+}
+
static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
{
return cpuid_apic >> index_msb;
diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h
index 9404c535b7ec..27fcd01b3ae6 100644
--- a/arch/x86/include/asm/bigsmp/ipi.h
+++ b/arch/x86/include/asm/bigsmp/ipi.h
@@ -1,25 +1,22 @@
#ifndef __ASM_MACH_IPI_H
#define __ASM_MACH_IPI_H
-void send_IPI_mask_sequence(cpumask_t mask, int vector);
+void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const struct cpumask *mask, int vector)
{
send_IPI_mask_sequence(mask, vector);
}
static inline void send_IPI_allbutself(int vector)
{
- cpumask_t mask = cpu_online_map;
- cpu_clear(smp_processor_id(), mask);
-
- if (!cpus_empty(mask))
- send_IPI_mask(mask, vector);
+ send_IPI_mask_allbutself(cpu_online_mask, vector);
}
static inline void send_IPI_all(int vector)
{
- send_IPI_mask(cpu_online_map, vector);
+ send_IPI_mask(cpu_online_mask, vector);
}
#endif /* __ASM_MACH_IPI_H */
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 7f225a4b2a26..097794ff6b79 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -71,15 +71,13 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
/* Make sure we keep the same behaviour */
static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
-#ifdef CONFIG_X86_32
- return 0;
-#else
+#ifdef CONFIG_X86_64
struct dma_mapping_ops *ops = get_dma_ops(dev);
if (ops->mapping_error)
return ops->mapping_error(dev, dma_addr);
- return (dma_addr == bad_dma_address);
#endif
+ return (dma_addr == bad_dma_address);
}
#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index 72c5a190bf48..99b6c39774a4 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -7,13 +7,12 @@
*
* It manages:
* - per-thread and per-cpu allocation of BTS and PEBS
- * - buffer memory allocation (optional)
- * - buffer overflow handling
+ * - buffer overflow handling (to be done)
* - buffer access
*
* It assumes:
- * - get_task_struct on all parameter tasks
- * - current is allowed to trace parameter tasks
+ * - get_task_struct on all traced tasks
+ * - current is allowed to trace tasks
*
*
* Copyright (C) 2007-2008 Intel Corporation.
@@ -23,13 +22,21 @@
#ifndef _ASM_X86_DS_H
#define _ASM_X86_DS_H
-#ifdef CONFIG_X86_DS
#include <linux/types.h>
#include <linux/init.h>
+#include <linux/err.h>
+
+#ifdef CONFIG_X86_DS
struct task_struct;
+struct ds_tracer;
+struct bts_tracer;
+struct pebs_tracer;
+
+typedef void (*bts_ovfl_callback_t)(struct bts_tracer *);
+typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *);
/*
* Request BTS or PEBS
@@ -37,60 +44,62 @@ struct task_struct;
* Due to alignement constraints, the actual buffer may be slightly
* smaller than the requested or provided buffer.
*
- * Returns 0 on success; -Eerrno otherwise
+ * Returns a pointer to a tracer structure on success, or
+ * ERR_PTR(errcode) on failure.
+ *
+ * The interrupt threshold is independent from the overflow callback
+ * to allow users to use their own overflow interrupt handling mechanism.
*
* task: the task to request recording for;
* NULL for per-cpu recording on the current cpu
* base: the base pointer for the (non-pageable) buffer;
- * NULL if buffer allocation requested
- * size: the size of the requested or provided buffer
+ * size: the size of the provided buffer in bytes
* ovfl: pointer to a function to be called on buffer overflow;
* NULL if cyclic buffer requested
+ * th: the interrupt threshold in records from the end of the buffer;
+ * -1 if no interrupt threshold is requested.
*/
-typedef void (*ds_ovfl_callback_t)(struct task_struct *);
-extern int ds_request_bts(struct task_struct *task, void *base, size_t size,
- ds_ovfl_callback_t ovfl);
-extern int ds_request_pebs(struct task_struct *task, void *base, size_t size,
- ds_ovfl_callback_t ovfl);
+extern struct bts_tracer *ds_request_bts(struct task_struct *task,
+ void *base, size_t size,
+ bts_ovfl_callback_t ovfl, size_t th);
+extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
+ void *base, size_t size,
+ pebs_ovfl_callback_t ovfl,
+ size_t th);
/*
* Release BTS or PEBS resources
*
- * Frees buffers allocated on ds_request.
- *
* Returns 0 on success; -Eerrno otherwise
*
- * task: the task to release resources for;
- * NULL to release resources for the current cpu
+ * tracer: the tracer handle returned from ds_request_~()
*/
-extern int ds_release_bts(struct task_struct *task);
-extern int ds_release_pebs(struct task_struct *task);
+extern int ds_release_bts(struct bts_tracer *tracer);
+extern int ds_release_pebs(struct pebs_tracer *tracer);
/*
- * Return the (array) index of the write pointer.
+ * Get the (array) index of the write pointer.
* (assuming an array of BTS/PEBS records)
*
- * Returns -Eerrno on error
+ * Returns 0 on success; -Eerrno on error
*
- * task: the task to access;
- * NULL to access the current cpu
- * pos (out): if not NULL, will hold the result
+ * tracer: the tracer handle returned from ds_request_~()
+ * pos (out): will hold the result
*/
-extern int ds_get_bts_index(struct task_struct *task, size_t *pos);
-extern int ds_get_pebs_index(struct task_struct *task, size_t *pos);
+extern int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos);
+extern int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos);
/*
- * Return the (array) index one record beyond the end of the array.
+ * Get the (array) index one record beyond the end of the array.
* (assuming an array of BTS/PEBS records)
*
- * Returns -Eerrno on error
+ * Returns 0 on success; -Eerrno on error
*
- * task: the task to access;
- * NULL to access the current cpu
- * pos (out): if not NULL, will hold the result
+ * tracer: the tracer handle returned from ds_request_~()
+ * pos (out): will hold the result
*/
-extern int ds_get_bts_end(struct task_struct *task, size_t *pos);
-extern int ds_get_pebs_end(struct task_struct *task, size_t *pos);
+extern int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos);
+extern int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos);
/*
* Provide a pointer to the BTS/PEBS record at parameter index.
@@ -101,14 +110,13 @@ extern int ds_get_pebs_end(struct task_struct *task, size_t *pos);
*
* Returns the size of a single record on success; -Eerrno on error
*
- * task: the task to access;
- * NULL to access the current cpu
+ * tracer: the tracer handle returned from ds_request_~()
* index: the index of the requested record
* record (out): pointer to the requested record
*/
-extern int ds_access_bts(struct task_struct *task,
+extern int ds_access_bts(struct bts_tracer *tracer,
size_t index, const void **record);
-extern int ds_access_pebs(struct task_struct *task,
+extern int ds_access_pebs(struct pebs_tracer *tracer,
size_t index, const void **record);
/*
@@ -128,38 +136,24 @@ extern int ds_access_pebs(struct task_struct *task,
*
* Returns the number of bytes written or -Eerrno.
*
- * task: the task to access;
- * NULL to access the current cpu
+ * tracer: the tracer handle returned from ds_request_~()
* buffer: the buffer to write
* size: the size of the buffer
*/
-extern int ds_write_bts(struct task_struct *task,
+extern int ds_write_bts(struct bts_tracer *tracer,
const void *buffer, size_t size);
-extern int ds_write_pebs(struct task_struct *task,
+extern int ds_write_pebs(struct pebs_tracer *tracer,
const void *buffer, size_t size);
/*
- * Same as ds_write_bts/pebs, but omit ownership checks.
- *
- * This is needed to have some other task than the owner of the
- * BTS/PEBS buffer or the parameter task itself write into the
- * respective buffer.
- */
-extern int ds_unchecked_write_bts(struct task_struct *task,
- const void *buffer, size_t size);
-extern int ds_unchecked_write_pebs(struct task_struct *task,
- const void *buffer, size_t size);
-
-/*
* Reset the write pointer of the BTS/PEBS buffer.
*
* Returns 0 on success; -Eerrno on error
*
- * task: the task to access;
- * NULL to access the current cpu
+ * tracer: the tracer handle returned from ds_request_~()
*/
-extern int ds_reset_bts(struct task_struct *task);
-extern int ds_reset_pebs(struct task_struct *task);
+extern int ds_reset_bts(struct bts_tracer *tracer);
+extern int ds_reset_pebs(struct pebs_tracer *tracer);
/*
* Clear the BTS/PEBS buffer and reset the write pointer.
@@ -167,33 +161,30 @@ extern int ds_reset_pebs(struct task_struct *task);
*
* Returns 0 on success; -Eerrno on error
*
- * task: the task to access;
- * NULL to access the current cpu
+ * tracer: the tracer handle returned from ds_request_~()
*/
-extern int ds_clear_bts(struct task_struct *task);
-extern int ds_clear_pebs(struct task_struct *task);
+extern int ds_clear_bts(struct bts_tracer *tracer);
+extern int ds_clear_pebs(struct pebs_tracer *tracer);
/*
* Provide the PEBS counter reset value.
*
* Returns 0 on success; -Eerrno on error
*
- * task: the task to access;
- * NULL to access the current cpu
+ * tracer: the tracer handle returned from ds_request_pebs()
* value (out): the counter reset value
*/
-extern int ds_get_pebs_reset(struct task_struct *task, u64 *value);
+extern int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value);
/*
* Set the PEBS counter reset value.
*
* Returns 0 on success; -Eerrno on error
*
- * task: the task to access;
- * NULL to access the current cpu
+ * tracer: the tracer handle returned from ds_request_pebs()
* value: the new counter reset value
*/
-extern int ds_set_pebs_reset(struct task_struct *task, u64 value);
+extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value);
/*
* Initialization
@@ -206,17 +197,13 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
/*
* The DS context - part of struct thread_struct.
*/
+#define MAX_SIZEOF_DS (12 * 8)
+
struct ds_context {
/* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
- unsigned char *ds;
+ unsigned char ds[MAX_SIZEOF_DS];
/* the owner of the BTS and PEBS configuration, respectively */
- struct task_struct *owner[2];
- /* buffer overflow notification function for BTS and PEBS */
- ds_ovfl_callback_t callback[2];
- /* the original buffer address */
- void *buffer[2];
- /* the number of allocated pages for on-request allocated buffers */
- unsigned int pages[2];
+ struct ds_tracer *owner[2];
/* use count */
unsigned long count;
/* a pointer to the context location inside the thread_struct
@@ -232,7 +219,8 @@ extern void ds_free(struct ds_context *context);
#else /* CONFIG_X86_DS */
-#define ds_init_intel(config) do {} while (0)
+struct cpuinfo_x86;
+static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
#endif /* CONFIG_X86_DS */
#endif /* _ASM_X86_DS_H */
diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h
index 94826cf87455..cc70c1c78ca4 100644
--- a/arch/x86/include/asm/emergency-restart.h
+++ b/arch/x86/include/asm/emergency-restart.h
@@ -8,7 +8,9 @@ enum reboot_type {
BOOT_BIOS = 'b',
#endif
BOOT_ACPI = 'a',
- BOOT_EFI = 'e'
+ BOOT_EFI = 'e',
+ BOOT_CF9 = 'p',
+ BOOT_CF9_COND = 'q',
};
extern enum reboot_type reboot_type;
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index 380f0b4f17ed..ba8423c5363f 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -9,31 +9,27 @@ static inline int apic_id_registered(void)
return (1);
}
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus_cluster(void)
{
-#if defined CONFIG_ES7000_CLUSTERED_APIC
- return CPU_MASK_ALL;
-#else
- return cpumask_of_cpu(smp_processor_id());
-#endif
+ return &CPU_MASK_ALL;
}
-#if defined CONFIG_ES7000_CLUSTERED_APIC
-#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
-#define INT_DELIVERY_MODE (dest_LowestPrio)
-#define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */
-#define NO_BALANCE_IRQ (1)
-#undef WAKE_SECONDARY_VIA_INIT
-#define WAKE_SECONDARY_VIA_MIP
-#else
+static inline const cpumask_t *target_cpus(void)
+{
+ return &cpumask_of_cpu(smp_processor_id());
+}
+
+#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER)
+#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio)
+#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */
+#define NO_BALANCE_IRQ_CLUSTER (1)
+
#define APIC_DFR_VALUE (APIC_DFR_FLAT)
#define INT_DELIVERY_MODE (dest_Fixed)
#define INT_DEST_MODE (0) /* phys delivery to target procs */
#define NO_BALANCE_IRQ (0)
#undef APIC_DEST_LOGICAL
#define APIC_DEST_LOGICAL 0x0
-#define WAKE_SECONDARY_VIA_INIT
-#endif
static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
{
@@ -60,6 +56,16 @@ static inline unsigned long calculate_ldr(int cpu)
* an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
* document number 292116). So here it goes...
*/
+static inline void init_apic_ldr_cluster(void)
+{
+ unsigned long val;
+ int cpu = smp_processor_id();
+
+ apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER);
+ val = calculate_ldr(cpu);
+ apic_write(APIC_LDR, val);
+}
+
static inline void init_apic_ldr(void)
{
unsigned long val;
@@ -70,17 +76,14 @@ static inline void init_apic_ldr(void)
apic_write(APIC_LDR, val);
}
-#ifndef CONFIG_X86_GENERICARCH
-extern void enable_apic_mode(void);
-#endif
-
extern int apic_version [MAX_APICS];
static inline void setup_apic_routing(void)
{
int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
- printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
+ printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
(apic_version[apic] == 0x14) ?
- "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]);
+ "Physical Cluster" : "Logical Cluster",
+ nr_ioapics, cpus_addr(*target_cpus())[0]);
}
static inline int multi_timer_check(int apic, int irq)
@@ -98,7 +101,7 @@ static inline int cpu_present_to_apicid(int mps_cpu)
{
if (!mps_cpu)
return boot_cpu_physical_apicid;
- else if (mps_cpu < NR_CPUS)
+ else if (mps_cpu < nr_cpu_ids)
return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
else
return BAD_APICID;
@@ -118,9 +121,9 @@ extern u8 cpu_2_logical_apicid[];
static inline int cpu_to_logical_apicid(int cpu)
{
#ifdef CONFIG_SMP
- if (cpu >= NR_CPUS)
- return BAD_APICID;
- return (int)cpu_2_logical_apicid[cpu];
+ if (cpu >= nr_cpu_ids)
+ return BAD_APICID;
+ return (int)cpu_2_logical_apicid[cpu];
#else
return logical_smp_processor_id();
#endif
@@ -144,16 +147,87 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid)
return (1);
}
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int
+cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
+{
+ int num_bits_set;
+ int cpus_found = 0;
+ int cpu;
+ int apicid;
+
+ num_bits_set = cpumask_weight(cpumask);
+ /* Return id to all */
+ if (num_bits_set == NR_CPUS)
+ return 0xFF;
+ /*
+ * The cpus in the mask must all be on the apic cluster. If are not
+ * on the same apicid cluster return default value of TARGET_CPUS.
+ */
+ cpu = cpumask_first(cpumask);
+ apicid = cpu_to_logical_apicid(cpu);
+ while (cpus_found < num_bits_set) {
+ if (cpumask_test_cpu(cpu, cpumask)) {
+ int new_apicid = cpu_to_logical_apicid(cpu);
+ if (apicid_cluster(apicid) !=
+ apicid_cluster(new_apicid)){
+ printk ("%s: Not a valid mask!\n", __func__);
+ return 0xFF;
+ }
+ apicid = new_apicid;
+ cpus_found++;
+ }
+ cpu++;
+ }
+ return apicid;
+}
+
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
{
int num_bits_set;
int cpus_found = 0;
int cpu;
int apicid;
- num_bits_set = cpus_weight(cpumask);
+ num_bits_set = cpus_weight(*cpumask);
/* Return id to all */
if (num_bits_set == NR_CPUS)
+ return cpu_to_logical_apicid(0);
+ /*
+ * The cpus in the mask must all be on the apic cluster. If are not
+ * on the same apicid cluster return default value of TARGET_CPUS.
+ */
+ cpu = first_cpu(*cpumask);
+ apicid = cpu_to_logical_apicid(cpu);
+ while (cpus_found < num_bits_set) {
+ if (cpu_isset(cpu, *cpumask)) {
+ int new_apicid = cpu_to_logical_apicid(cpu);
+ if (apicid_cluster(apicid) !=
+ apicid_cluster(new_apicid)){
+ printk ("%s: Not a valid mask!\n", __func__);
+ return cpu_to_logical_apicid(0);
+ }
+ apicid = new_apicid;
+ cpus_found++;
+ }
+ cpu++;
+ }
+ return apicid;
+}
+
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int num_bits_set;
+ int num_bits_set2;
+ int cpus_found = 0;
+ int cpu;
+ int apicid = 0;
+
+ num_bits_set = cpumask_weight(cpumask);
+ num_bits_set2 = cpumask_weight(andmask);
+ num_bits_set = min(num_bits_set, num_bits_set2);
+ /* Return id to all */
+ if (num_bits_set >= nr_cpu_ids)
#if defined CONFIG_ES7000_CLUSTERED_APIC
return 0xFF;
#else
@@ -163,14 +237,17 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
* The cpus in the mask must all be on the apic cluster. If are not
* on the same apicid cluster return default value of TARGET_CPUS.
*/
- cpu = first_cpu(cpumask);
+ cpu = cpumask_first_and(cpumask, andmask);
apicid = cpu_to_logical_apicid(cpu);
+
while (cpus_found < num_bits_set) {
- if (cpu_isset(cpu, cpumask)) {
+ if (cpumask_test_cpu(cpu, cpumask) &&
+ cpumask_test_cpu(cpu, andmask)) {
int new_apicid = cpu_to_logical_apicid(cpu);
if (apicid_cluster(apicid) !=
- apicid_cluster(new_apicid)){
- printk ("%s: Not a valid mask!\n", __func__);
+ apicid_cluster(new_apicid)) {
+ printk(KERN_WARNING
+ "%s: Not a valid mask!\n", __func__);
#if defined CONFIG_ES7000_CLUSTERED_APIC
return 0xFF;
#else
diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h
index 632a955fcc0a..7e8ed24d4b8a 100644
--- a/arch/x86/include/asm/es7000/ipi.h
+++ b/arch/x86/include/asm/es7000/ipi.h
@@ -1,24 +1,22 @@
#ifndef __ASM_ES7000_IPI_H
#define __ASM_ES7000_IPI_H
-void send_IPI_mask_sequence(cpumask_t mask, int vector);
+void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const struct cpumask *mask, int vector)
{
send_IPI_mask_sequence(mask, vector);
}
static inline void send_IPI_allbutself(int vector)
{
- cpumask_t mask = cpu_online_map;
- cpu_clear(smp_processor_id(), mask);
- if (!cpus_empty(mask))
- send_IPI_mask(mask, vector);
+ send_IPI_mask_allbutself(cpu_online_mask, vector);
}
static inline void send_IPI_all(int vector)
{
- send_IPI_mask(cpu_online_map, vector);
+ send_IPI_mask(cpu_online_mask, vector);
}
#endif /* __ASM_ES7000_IPI_H */
diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h
index 398493461913..78f0daaee436 100644
--- a/arch/x86/include/asm/es7000/wakecpu.h
+++ b/arch/x86/include/asm/es7000/wakecpu.h
@@ -1,36 +1,12 @@
#ifndef __ASM_ES7000_WAKECPU_H
#define __ASM_ES7000_WAKECPU_H
-/*
- * This file copes with machines that wakeup secondary CPUs by the
- * INIT, INIT, STARTUP sequence.
- */
-
-#ifdef CONFIG_ES7000_CLUSTERED_APIC
-#define WAKE_SECONDARY_VIA_MIP
-#else
-#define WAKE_SECONDARY_VIA_INIT
-#endif
-
-#ifdef WAKE_SECONDARY_VIA_MIP
-extern int es7000_start_cpu(int cpu, unsigned long eip);
-static inline int
-wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
-{
- int boot_error = 0;
- boot_error = es7000_start_cpu(phys_apicid, start_eip);
- return boot_error;
-}
-#endif
-
-#define TRAMPOLINE_LOW phys_to_virt(0x467)
-#define TRAMPOLINE_HIGH phys_to_virt(0x469)
-
-#define boot_cpu_apicid boot_cpu_physical_apicid
+#define TRAMPOLINE_PHYS_LOW 0x467
+#define TRAMPOLINE_PHYS_HIGH 0x469
static inline void wait_for_init_deassert(atomic_t *deassert)
{
-#ifdef WAKE_SECONDARY_VIA_INIT
+#ifndef CONFIG_ES7000_CLUSTERED_APIC
while (!atomic_read(deassert))
cpu_relax();
#endif
@@ -50,9 +26,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
{
}
-#define inquire_remote_apic(apicid) do { \
- if (apic_verbosity >= APIC_DEBUG) \
- __inquire_remote_apic(apicid); \
- } while (0)
+extern void __inquire_remote_apic(int apicid);
+
+static inline void inquire_remote_apic(int apicid)
+{
+ if (apic_verbosity >= APIC_DEBUG)
+ __inquire_remote_apic(apicid);
+}
#endif /* __ASM_MACH_WAKECPU_H */
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9e8bc29b8b17..7e61b4ceb9a4 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -17,8 +17,40 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
*/
return addr - 1;
}
-#endif
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+struct dyn_arch_ftrace {
+ /* No extra data needed for x86 */
+};
+
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* __ASSEMBLY__ */
#endif /* CONFIG_FUNCTION_TRACER */
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Stack of return addresses for functions
+ * of a thread.
+ * Used in struct thread_info
+ */
+struct ftrace_ret_stack {
+ unsigned long ret;
+ unsigned long func;
+ unsigned long long calltime;
+};
+
+/*
+ * Primary handler of a function return.
+ * It relays on ftrace_return_to_handler.
+ * Defined in entry32.S
+ */
+extern void return_to_handler(void);
+
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
#endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 5cbd4fcc06fd..746f37a7963a 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -2,6 +2,7 @@
#define _ASM_X86_GENAPIC_32_H
#include <asm/mpspec.h>
+#include <asm/atomic.h>
/*
* Generic APIC driver interface.
@@ -23,7 +24,7 @@ struct genapic {
int (*probe)(void);
int (*apic_id_registered)(void);
- cpumask_t (*target_cpus)(void);
+ const struct cpumask *(*target_cpus)(void);
int int_delivery_mode;
int int_dest_mode;
int ESR_DISABLE;
@@ -56,15 +57,27 @@ struct genapic {
unsigned (*get_apic_id)(unsigned long x);
unsigned long apic_id_mask;
- unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
- cpumask_t (*vector_allocation_domain)(int cpu);
+ unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
+ unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
+ const struct cpumask *andmask);
+ void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
#ifdef CONFIG_SMP
/* ipi */
- void (*send_IPI_mask)(cpumask_t mask, int vector);
+ void (*send_IPI_mask)(const struct cpumask *mask, int vector);
+ void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
+ int vector);
void (*send_IPI_allbutself)(int vector);
void (*send_IPI_all)(int vector);
#endif
+ int (*wakeup_cpu)(int apicid, unsigned long start_eip);
+ int trampoline_phys_low;
+ int trampoline_phys_high;
+ void (*wait_for_init_deassert)(atomic_t *deassert);
+ void (*smp_callin_clear_local_apic)(void);
+ void (*store_NMI_vector)(unsigned short *high, unsigned short *low);
+ void (*restore_NMI_vector)(unsigned short *high, unsigned short *low);
+ void (*inquire_remote_apic)(int apicid);
};
#define APICFUNC(x) .x = x,
@@ -105,16 +118,25 @@ struct genapic {
APICFUNC(get_apic_id) \
.apic_id_mask = APIC_ID_MASK, \
APICFUNC(cpu_mask_to_apicid) \
- APICFUNC(vector_allocation_domain) \
+ APICFUNC(cpu_mask_to_apicid_and) \
+ APICFUNC(vector_allocation_domain) \
APICFUNC(acpi_madt_oem_check) \
IPIFUNC(send_IPI_mask) \
IPIFUNC(send_IPI_allbutself) \
IPIFUNC(send_IPI_all) \
APICFUNC(enable_apic_mode) \
APICFUNC(phys_pkg_id) \
+ .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \
+ .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \
+ APICFUNC(wait_for_init_deassert) \
+ APICFUNC(smp_callin_clear_local_apic) \
+ APICFUNC(store_NMI_vector) \
+ APICFUNC(restore_NMI_vector) \
+ APICFUNC(inquire_remote_apic) \
}
extern struct genapic *genapic;
+extern void es7000_update_genapic_to_cluster(void);
enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
#define get_uv_system_type() UV_NONE
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index 13c4e96199ea..adf32fb56aa6 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -1,6 +1,8 @@
#ifndef _ASM_X86_GENAPIC_64_H
#define _ASM_X86_GENAPIC_64_H
+#include <linux/cpumask.h>
+
/*
* Copyright 2004 James Cleverdon, IBM.
* Subject to the GNU Public License, v.2
@@ -18,20 +20,26 @@ struct genapic {
u32 int_delivery_mode;
u32 int_dest_mode;
int (*apic_id_registered)(void);
- cpumask_t (*target_cpus)(void);
- cpumask_t (*vector_allocation_domain)(int cpu);
+ const struct cpumask *(*target_cpus)(void);
+ void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
void (*init_apic_ldr)(void);
/* ipi */
- void (*send_IPI_mask)(cpumask_t mask, int vector);
+ void (*send_IPI_mask)(const struct cpumask *mask, int vector);
+ void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
+ int vector);
void (*send_IPI_allbutself)(int vector);
void (*send_IPI_all)(int vector);
void (*send_IPI_self)(int vector);
/* */
- unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
+ unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
+ unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
+ const struct cpumask *andmask);
unsigned int (*phys_pkg_id)(int index_msb);
unsigned int (*get_apic_id)(unsigned long x);
unsigned long (*set_apic_id)(unsigned int id);
unsigned long apic_id_mask;
+ /* wakeup_secondary_cpu */
+ int (*wakeup_cpu)(int apicid, unsigned long start_eip);
};
extern struct genapic *genapic;
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 6afd9933a7dd..25d527ca1362 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -188,17 +188,14 @@ extern void restore_IO_APIC_setup(void);
extern void reinit_intr_remapped_IO_APIC(int);
#endif
-extern int probe_nr_irqs(void);
+extern void probe_nr_irqs_gsi(void);
#else /* !CONFIG_X86_IO_APIC */
#define io_apic_assign_pci_irqs 0
static const int timer_through_8259 = 0;
-static inline void ioapic_init_mappings(void) { }
+static inline void ioapic_init_mappings(void) { }
-static inline int probe_nr_irqs(void)
-{
- return NR_IRQS;
-}
+static inline void probe_nr_irqs_gsi(void) { }
#endif
#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
new file mode 100644
index 000000000000..c1f06289b14b
--- /dev/null
+++ b/arch/x86/include/asm/iomap.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright © 2008 Ingo Molnar
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+void *
+iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
+
+void
+iounmap_atomic(void *kvaddr, enum km_type type);
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index e4a552d44465..0b500c5b6446 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -6,7 +6,6 @@ extern void no_iommu_init(void);
extern struct dma_mapping_ops nommu_dma_ops;
extern int force_iommu, no_iommu;
extern int iommu_detected;
-extern int dmar_disabled;
extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len);
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index f89dffb28aa9..c745a306f7d3 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -117,7 +117,8 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
native_apic_mem_write(APIC_ICR, cfg);
}
-static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
+static inline void send_IPI_mask_sequence(const struct cpumask *mask,
+ int vector)
{
unsigned long flags;
unsigned long query_cpu;
@@ -128,11 +129,29 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
* - mbligh
*/
local_irq_save(flags);
- for_each_cpu_mask_nr(query_cpu, mask) {
+ for_each_cpu(query_cpu, mask) {
__send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu),
vector, APIC_DEST_PHYSICAL);
}
local_irq_restore(flags);
}
+static inline void send_IPI_mask_allbutself(const struct cpumask *mask,
+ int vector)
+{
+ unsigned long flags;
+ unsigned int query_cpu;
+ unsigned int this_cpu = smp_processor_id();
+
+ /* See Hack comment above */
+
+ local_irq_save(flags);
+ for_each_cpu(query_cpu, mask)
+ if (query_cpu != this_cpu)
+ __send_IPI_dest_field(
+ per_cpu(x86_cpu_to_apicid, query_cpu),
+ vector, APIC_DEST_PHYSICAL);
+ local_irq_restore(flags);
+}
+
#endif /* _ASM_X86_IPI_H */
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index bae0eda95486..8766d30fb746 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -37,7 +37,7 @@ extern int irqbalance_disable(char *str);
#ifdef CONFIG_HOTPLUG_CPU
#include <linux/cpumask.h>
-extern void fixup_irqs(cpumask_t map);
+extern void fixup_irqs(void);
#endif
extern unsigned int do_IRQ(struct pt_regs *regs);
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 0005adb0f941..f7ff65032b9d 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -101,12 +101,23 @@
#define LAST_VM86_IRQ 15
#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
+#define NR_IRQS_LEGACY 16
+
#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
+
+#ifndef CONFIG_SPARSE_IRQ
# if NR_CPUS < MAX_IO_APICS
# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
# else
# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
# endif
+#else
+# if (8 * NR_CPUS) > (32 * MAX_IO_APICS)
+# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS))
+# else
+# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
+# endif
+#endif
#elif defined(CONFIG_X86_VOYAGER)
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h
index ff3a6c236c00..8863d978cb96 100644
--- a/arch/x86/include/asm/mach-default/mach_apic.h
+++ b/arch/x86/include/asm/mach-default/mach_apic.h
@@ -8,12 +8,12 @@
#define APIC_DFR_VALUE (APIC_DFR_FLAT)
-static inline cpumask_t target_cpus(void)
+static inline const struct cpumask *target_cpus(void)
{
#ifdef CONFIG_SMP
- return cpu_online_map;
+ return cpu_online_mask;
#else
- return cpumask_of_cpu(0);
+ return cpumask_of(0);
#endif
}
@@ -28,15 +28,18 @@ static inline cpumask_t target_cpus(void)
#define apic_id_registered (genapic->apic_id_registered)
#define init_apic_ldr (genapic->init_apic_ldr)
#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
+#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
#define phys_pkg_id (genapic->phys_pkg_id)
#define vector_allocation_domain (genapic->vector_allocation_domain)
#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID)))
#define send_IPI_self (genapic->send_IPI_self)
+#define wakeup_secondary_cpu (genapic->wakeup_cpu)
extern void setup_apic_routing(void);
#else
#define INT_DELIVERY_MODE dest_LowestPrio
#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */
#define TARGET_CPUS (target_cpus())
+#define wakeup_secondary_cpu wakeup_secondary_cpu_via_init
/*
* Set up the logical destination ID.
*
@@ -59,9 +62,18 @@ static inline int apic_id_registered(void)
return physid_isset(read_apic_id(), phys_cpu_present_map);
}
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const struct cpumask *cpumask)
{
- return cpus_addr(cpumask)[0];
+ return cpumask_bits(cpumask)[0];
+}
+
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ unsigned long mask1 = cpumask_bits(cpumask)[0];
+ unsigned long mask2 = cpumask_bits(andmask)[0];
+
+ return (unsigned int)(mask1 & mask2);
}
static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
@@ -86,7 +98,7 @@ static inline int apicid_to_node(int logical_apicid)
#endif
}
-static inline cpumask_t vector_allocation_domain(int cpu)
+static inline void vector_allocation_domain(int cpu, struct cpumask *retmask)
{
/* Careful. Some cpus do not strictly honor the set of cpus
* specified in the interrupt destination when using lowest
@@ -96,8 +108,7 @@ static inline cpumask_t vector_allocation_domain(int cpu)
* deliver interrupts to the wrong hyperthread when only one
* hyperthread was specified in the interrupt desitination.
*/
- cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
- return domain;
+ *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } };
}
#endif
@@ -129,7 +140,7 @@ static inline int cpu_to_logical_apicid(int cpu)
static inline int cpu_present_to_apicid(int mps_cpu)
{
- if (mps_cpu < NR_CPUS && cpu_present(mps_cpu))
+ if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
else
return BAD_APICID;
diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h
index fabca01ebacf..191312d155da 100644
--- a/arch/x86/include/asm/mach-default/mach_ipi.h
+++ b/arch/x86/include/asm/mach-default/mach_ipi.h
@@ -4,7 +4,8 @@
/* Avoid include hell */
#define NMI_VECTOR 0x02
-void send_IPI_mask_bitmask(cpumask_t mask, int vector);
+void send_IPI_mask_bitmask(const struct cpumask *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
void __send_IPI_shortcut(unsigned int shortcut, int vector);
extern int no_broadcast;
@@ -12,28 +13,27 @@ extern int no_broadcast;
#ifdef CONFIG_X86_64
#include <asm/genapic.h>
#define send_IPI_mask (genapic->send_IPI_mask)
+#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself)
#else
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const struct cpumask *mask, int vector)
{
send_IPI_mask_bitmask(mask, vector);
}
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
#endif
static inline void __local_send_IPI_allbutself(int vector)
{
- if (no_broadcast || vector == NMI_VECTOR) {
- cpumask_t mask = cpu_online_map;
-
- cpu_clear(smp_processor_id(), mask);
- send_IPI_mask(mask, vector);
- } else
+ if (no_broadcast || vector == NMI_VECTOR)
+ send_IPI_mask_allbutself(cpu_online_mask, vector);
+ else
__send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
}
static inline void __local_send_IPI_all(int vector)
{
if (no_broadcast || vector == NMI_VECTOR)
- send_IPI_mask(cpu_online_map, vector);
+ send_IPI_mask(cpu_online_mask, vector);
else
__send_IPI_shortcut(APIC_DEST_ALLINC, vector);
}
diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h
index 9d80db91e992..ceb013660146 100644
--- a/arch/x86/include/asm/mach-default/mach_wakecpu.h
+++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h
@@ -1,17 +1,8 @@
#ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H
#define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H
-/*
- * This file copes with machines that wakeup secondary CPUs by the
- * INIT, INIT, STARTUP sequence.
- */
-
-#define WAKE_SECONDARY_VIA_INIT
-
-#define TRAMPOLINE_LOW phys_to_virt(0x467)
-#define TRAMPOLINE_HIGH phys_to_virt(0x469)
-
-#define boot_cpu_apicid boot_cpu_physical_apicid
+#define TRAMPOLINE_PHYS_LOW (0x467)
+#define TRAMPOLINE_PHYS_HIGH (0x469)
static inline void wait_for_init_deassert(atomic_t *deassert)
{
@@ -33,9 +24,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
{
}
-#define inquire_remote_apic(apicid) do { \
- if (apic_verbosity >= APIC_DEBUG) \
- __inquire_remote_apic(apicid); \
- } while (0)
+extern void __inquire_remote_apic(int apicid);
+
+static inline void inquire_remote_apic(int apicid)
+{
+ if (apic_verbosity >= APIC_DEBUG)
+ __inquire_remote_apic(apicid);
+}
#endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */
diff --git a/arch/x86/include/asm/mach-default/smpboot_hooks.h b/arch/x86/include/asm/mach-default/smpboot_hooks.h
index dbab36d64d48..23bf52103b89 100644
--- a/arch/x86/include/asm/mach-default/smpboot_hooks.h
+++ b/arch/x86/include/asm/mach-default/smpboot_hooks.h
@@ -13,9 +13,11 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
CMOS_WRITE(0xa, 0xf);
local_flush_tlb();
pr_debug("1.\n");
- *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
+ *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
+ start_eip >> 4;
pr_debug("2.\n");
- *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
+ *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
+ start_eip & 0xf;
pr_debug("3.\n");
}
@@ -32,7 +34,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
*/
CMOS_WRITE(0, 0xf);
- *((volatile long *) phys_to_virt(0x467)) = 0;
+ *((volatile long *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
}
static inline void __init smpboot_setup_io_apic(void)
diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h
index 5180bd7478fb..48553e958ad5 100644
--- a/arch/x86/include/asm/mach-generic/mach_apic.h
+++ b/arch/x86/include/asm/mach-generic/mach_apic.h
@@ -24,9 +24,11 @@
#define check_phys_apicid_present (genapic->check_phys_apicid_present)
#define check_apicid_used (genapic->check_apicid_used)
#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
+#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
#define vector_allocation_domain (genapic->vector_allocation_domain)
#define enable_apic_mode (genapic->enable_apic_mode)
#define phys_pkg_id (genapic->phys_pkg_id)
+#define wakeup_secondary_cpu (genapic->wakeup_cpu)
extern void generic_bigsmp_probe(void);
diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h
new file mode 100644
index 000000000000..1ab16b168c8a
--- /dev/null
+++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H
+#define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H
+
+#define TRAMPOLINE_PHYS_LOW (genapic->trampoline_phys_low)
+#define TRAMPOLINE_PHYS_HIGH (genapic->trampoline_phys_high)
+#define wait_for_init_deassert (genapic->wait_for_init_deassert)
+#define smp_callin_clear_local_apic (genapic->smp_callin_clear_local_apic)
+#define store_NMI_vector (genapic->store_NMI_vector)
+#define restore_NMI_vector (genapic->restore_NMI_vector)
+#define inquire_remote_apic (genapic->inquire_remote_apic)
+
+#endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 485bdf059ffb..07f1af494ca5 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -34,10 +34,14 @@ static inline void get_memcfg_numa(void)
extern int early_pfn_to_nid(unsigned long pfn);
+extern void resume_map_numa_kva(pgd_t *pgd);
+
#else /* !CONFIG_NUMA */
#define get_memcfg_numa get_memcfg_numa_flat
+static inline void resume_map_numa_kva(pgd_t *pgd) {}
+
#endif /* CONFIG_NUMA */
#ifdef CONFIG_DISCONTIGMEM
diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h
index 0bf2a06b7a4e..c80f00d29965 100644
--- a/arch/x86/include/asm/numaq/apic.h
+++ b/arch/x86/include/asm/numaq/apic.h
@@ -7,9 +7,9 @@
#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus(void)
{
- return CPU_MASK_ALL;
+ return &CPU_MASK_ALL;
}
#define NO_BALANCE_IRQ (1)
@@ -122,7 +122,13 @@ static inline void enable_apic_mode(void)
* We use physical apicids here, not logical, so just return the default
* physical broadcast to stop people from breaking us
*/
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+{
+ return (int) 0xF;
+}
+
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
{
return (int) 0xF;
}
diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h
index 935588d286cf..a8374c652778 100644
--- a/arch/x86/include/asm/numaq/ipi.h
+++ b/arch/x86/include/asm/numaq/ipi.h
@@ -1,25 +1,22 @@
#ifndef __ASM_NUMAQ_IPI_H
#define __ASM_NUMAQ_IPI_H
-void send_IPI_mask_sequence(cpumask_t, int vector);
+void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const struct cpumask *mask, int vector)
{
send_IPI_mask_sequence(mask, vector);
}
static inline void send_IPI_allbutself(int vector)
{
- cpumask_t mask = cpu_online_map;
- cpu_clear(smp_processor_id(), mask);
-
- if (!cpus_empty(mask))
- send_IPI_mask(mask, vector);
+ send_IPI_mask_allbutself(cpu_online_mask, vector);
}
static inline void send_IPI_all(int vector)
{
- send_IPI_mask(cpu_online_map, vector);
+ send_IPI_mask(cpu_online_mask, vector);
}
#endif /* __ASM_NUMAQ_IPI_H */
diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h
index c577bda5b1c5..6f499df8eddb 100644
--- a/arch/x86/include/asm/numaq/wakecpu.h
+++ b/arch/x86/include/asm/numaq/wakecpu.h
@@ -3,12 +3,8 @@
/* This file copes with machines that wakeup secondary CPUs by NMIs */
-#define WAKE_SECONDARY_VIA_NMI
-
-#define TRAMPOLINE_LOW phys_to_virt(0x8)
-#define TRAMPOLINE_HIGH phys_to_virt(0xa)
-
-#define boot_cpu_apicid boot_cpu_logical_apicid
+#define TRAMPOLINE_PHYS_LOW (0x8)
+#define TRAMPOLINE_PHYS_HIGH (0xa)
/* We don't do anything here because we use NMI's to boot instead */
static inline void wait_for_init_deassert(atomic_t *deassert)
@@ -27,17 +23,23 @@ static inline void smp_callin_clear_local_apic(void)
static inline void store_NMI_vector(unsigned short *high, unsigned short *low)
{
printk("Storing NMI vector\n");
- *high = *((volatile unsigned short *) TRAMPOLINE_HIGH);
- *low = *((volatile unsigned short *) TRAMPOLINE_LOW);
+ *high =
+ *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH));
+ *low =
+ *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW));
}
static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
{
printk("Restoring NMI vector\n");
- *((volatile unsigned short *) TRAMPOLINE_HIGH) = *high;
- *((volatile unsigned short *) TRAMPOLINE_LOW) = *low;
+ *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
+ *high;
+ *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
+ *low;
}
-#define inquire_remote_apic(apicid) {}
+static inline void inquire_remote_apic(int apicid)
+{
+}
#endif /* __ASM_NUMAQ_WAKECPU_H */
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h
index 5b28995d664e..d02d936840a3 100644
--- a/arch/x86/include/asm/pci_64.h
+++ b/arch/x86/include/asm/pci_64.h
@@ -34,8 +34,6 @@ extern void pci_iommu_alloc(void);
*/
#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
-#if defined(CONFIG_GART_IOMMU) || defined(CONFIG_CALGARY_IOMMU)
-
#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
dma_addr_t ADDR_NAME;
#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
@@ -49,18 +47,6 @@ extern void pci_iommu_alloc(void);
#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
(((PTR)->LEN_NAME) = (VAL))
-#else
-/* No IOMMU */
-
-#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
-#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
-#define pci_unmap_addr(PTR, ADDR_NAME) (0)
-#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0)
-#define pci_unmap_len(PTR, LEN_NAME) (0)
-#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0)
-
-#endif
-
#endif /* __KERNEL__ */
#endif /* _ASM_X86_PCI_64_H */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index d1531c8480b7..eefb0594b058 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -271,8 +271,6 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
extern int do_set_thread_area(struct task_struct *p, int idx,
struct user_desc __user *info, int can_allocate);
-#define __ARCH_WANT_COMPAT_SYS_PTRACE
-
#endif /* __KERNEL__ */
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index f12d37237465..294daeb3a006 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -16,6 +16,8 @@ static inline void visws_early_detect(void) { }
static inline int is_visws_box(void) { return 0; }
#endif
+extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
+extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip);
/*
* Any setup quirks to be performed?
*/
@@ -39,6 +41,7 @@ struct x86_quirks {
void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
unsigned short oemsize);
int (*setup_ioapic_ids)(void);
+ int (*update_genapic)(void);
};
extern struct x86_quirks *x86_quirks;
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index d12811ce51d9..830b9fcb6427 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -60,7 +60,7 @@ struct smp_ops {
void (*cpu_die)(unsigned int cpu);
void (*play_dead)(void);
- void (*send_call_func_ipi)(cpumask_t mask);
+ void (*send_call_func_ipi)(const struct cpumask *mask);
void (*send_call_func_single_ipi)(int cpu);
};
@@ -125,7 +125,7 @@ static inline void arch_send_call_function_single_ipi(int cpu)
static inline void arch_send_call_function_ipi(cpumask_t mask)
{
- smp_ops.send_call_func_ipi(mask);
+ smp_ops.send_call_func_ipi(&mask);
}
void cpu_disable_common(void);
@@ -138,7 +138,7 @@ void native_cpu_die(unsigned int cpu);
void native_play_dead(void);
void play_dead_common(void);
-void native_send_call_func_ipi(cpumask_t mask);
+void native_send_call_func_ipi(const struct cpumask *mask);
void native_send_call_func_single_ipi(int cpu);
extern void prefill_possible_map(void);
diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h
index 9b3070f1c2ac..651a93849341 100644
--- a/arch/x86/include/asm/summit/apic.h
+++ b/arch/x86/include/asm/summit/apic.h
@@ -14,13 +14,13 @@
#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
-static inline cpumask_t target_cpus(void)
+static inline const cpumask_t *target_cpus(void)
{
/* CPU_MASK_ALL (0xff) has undefined behaviour with
* dest_LowestPrio mode logical clustered apic interrupt routing
* Just start on cpu 0. IRQ balancing will spread load
*/
- return cpumask_of_cpu(0);
+ return &cpumask_of_cpu(0);
}
#define INT_DELIVERY_MODE (dest_LowestPrio)
@@ -137,14 +137,14 @@ static inline void enable_apic_mode(void)
{
}
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
+static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
{
int num_bits_set;
int cpus_found = 0;
int cpu;
int apicid;
- num_bits_set = cpus_weight(cpumask);
+ num_bits_set = cpus_weight(*cpumask);
/* Return id to all */
if (num_bits_set == NR_CPUS)
return (int) 0xFF;
@@ -152,10 +152,10 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
* The cpus in the mask must all be on the apic cluster. If are not
* on the same apicid cluster return default value of TARGET_CPUS.
*/
- cpu = first_cpu(cpumask);
+ cpu = first_cpu(*cpumask);
apicid = cpu_to_logical_apicid(cpu);
while (cpus_found < num_bits_set) {
- if (cpu_isset(cpu, cpumask)) {
+ if (cpu_isset(cpu, *cpumask)) {
int new_apicid = cpu_to_logical_apicid(cpu);
if (apicid_cluster(apicid) !=
apicid_cluster(new_apicid)){
@@ -170,6 +170,45 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
return apicid;
}
+static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int num_bits_set;
+ int num_bits_set2;
+ int cpus_found = 0;
+ int cpu;
+ int apicid = 0;
+
+ num_bits_set = cpumask_weight(cpumask);
+ num_bits_set2 = cpumask_weight(andmask);
+ num_bits_set = min(num_bits_set, num_bits_set2);
+ /* Return id to all */
+ if (num_bits_set >= nr_cpu_ids)
+ return 0xFF;
+ /*
+ * The cpus in the mask must all be on the apic cluster. If are not
+ * on the same apicid cluster return default value of TARGET_CPUS.
+ */
+ cpu = cpumask_first_and(cpumask, andmask);
+ apicid = cpu_to_logical_apicid(cpu);
+ while (cpus_found < num_bits_set) {
+ if (cpumask_test_cpu(cpu, cpumask)
+ && cpumask_test_cpu(cpu, andmask)) {
+ int new_apicid = cpu_to_logical_apicid(cpu);
+ if (apicid_cluster(apicid) !=
+ apicid_cluster(new_apicid)) {
+ printk(KERN_WARNING
+ "%s: Not a valid mask!\n", __func__);
+ return 0xFF;
+ }
+ apicid = apicid | new_apicid;
+ cpus_found++;
+ }
+ cpu++;
+ }
+ return apicid;
+}
+
/* cpuid returns the value latched in the HW at reset, not the APIC ID
* register's value. For any box whose BIOS changes APIC IDs, like
* clustered APIC systems, we must use hard_smp_processor_id.
diff --git a/arch/x86/include/asm/summit/ipi.h b/arch/x86/include/asm/summit/ipi.h
index 53bd1e7bd7b4..a8a2c24f50cc 100644
--- a/arch/x86/include/asm/summit/ipi.h
+++ b/arch/x86/include/asm/summit/ipi.h
@@ -1,9 +1,10 @@
#ifndef __ASM_SUMMIT_IPI_H
#define __ASM_SUMMIT_IPI_H
-void send_IPI_mask_sequence(cpumask_t mask, int vector);
+void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
+void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
-static inline void send_IPI_mask(cpumask_t mask, int vector)
+static inline void send_IPI_mask(const cpumask_t *mask, int vector)
{
send_IPI_mask_sequence(mask, vector);
}
@@ -14,12 +15,12 @@ static inline void send_IPI_allbutself(int vector)
cpu_clear(smp_processor_id(), mask);
if (!cpus_empty(mask))
- send_IPI_mask(mask, vector);
+ send_IPI_mask(&mask, vector);
}
static inline void send_IPI_all(int vector)
{
- send_IPI_mask(cpu_online_map, vector);
+ send_IPI_mask(&cpu_online_map, vector);
}
#endif /* __ASM_SUMMIT_IPI_H */
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 2ed3f0f44ff7..07c3e4048991 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -314,6 +314,8 @@ extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
void default_idle(void);
+void stop_this_cpu(void *dummy);
+
/*
* Force strict CPU ordering.
* And yes, this is required on UP too when we're talking
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e44d379faad2..0921b4018c11 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -20,6 +20,8 @@
struct task_struct;
struct exec_domain;
#include <asm/processor.h>
+#include <asm/ftrace.h>
+#include <asm/atomic.h>
struct thread_info {
struct task_struct *task; /* main task structure */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 4850e4b02b61..79e31e9dcdda 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -226,6 +226,8 @@ extern cpumask_t cpu_coregroup_map(int cpu);
#define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id)
#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
+#define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu))
+#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
/* indicates that pointers to the topology cpumask_t maps are valid */
#define arch_provides_topology_pointers yes
@@ -239,7 +241,7 @@ struct pci_bus;
void set_pci_bus_resources_arch_default(struct pci_bus *b);
#ifdef CONFIG_SMP
-#define mc_capable() (boot_cpu_data.x86_max_cores > 1)
+#define mc_capable() (cpus_weight(per_cpu(cpu_core_map, 0)) != nr_cpu_ids)
#define smt_capable() (smp_num_siblings > 1)
#endif
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 35c54921b2e4..99192bb55a53 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -157,6 +157,7 @@ extern int __get_user_bad(void);
int __ret_gu; \
unsigned long __val_gu; \
__chk_user_ptr(ptr); \
+ might_fault(); \
switch (sizeof(*(ptr))) { \
case 1: \
__get_user_x(1, __ret_gu, __val_gu, ptr); \
@@ -241,6 +242,7 @@ extern void __put_user_8(void);
int __ret_pu; \
__typeof__(*(ptr)) __pu_val; \
__chk_user_ptr(ptr); \
+ might_fault(); \
__pu_val = x; \
switch (sizeof(*(ptr))) { \
case 1: \
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index d095a3aeea1b..5e06259e90e5 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -82,8 +82,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
static __always_inline unsigned long __must_check
__copy_to_user(void __user *to, const void *from, unsigned long n)
{
- might_sleep();
- return __copy_to_user_inatomic(to, from, n);
+ might_fault();
+ return __copy_to_user_inatomic(to, from, n);
}
static __always_inline unsigned long
@@ -137,7 +137,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
static __always_inline unsigned long
__copy_from_user(void *to, const void __user *from, unsigned long n)
{
- might_sleep();
+ might_fault();
if (__builtin_constant_p(n)) {
unsigned long ret;
@@ -159,7 +159,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
static __always_inline unsigned long __copy_from_user_nocache(void *to,
const void __user *from, unsigned long n)
{
- might_sleep();
+ might_fault();
if (__builtin_constant_p(n)) {
unsigned long ret;
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 664f15280f14..84210c479fca 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -29,6 +29,8 @@ static __always_inline __must_check
int __copy_from_user(void *dst, const void __user *src, unsigned size)
{
int ret = 0;
+
+ might_fault();
if (!__builtin_constant_p(size))
return copy_user_generic(dst, (__force void *)src, size);
switch (size) {
@@ -46,7 +48,7 @@ int __copy_from_user(void *dst, const void __user *src, unsigned size)
return ret;
case 10:
__get_user_asm(*(u64 *)dst, (u64 __user *)src,
- ret, "q", "", "=r", 16);
+ ret, "q", "", "=r", 10);
if (unlikely(ret))
return ret;
__get_user_asm(*(u16 *)(8 + (char *)dst),
@@ -71,6 +73,8 @@ static __always_inline __must_check
int __copy_to_user(void __user *dst, const void *src, unsigned size)
{
int ret = 0;
+
+ might_fault();
if (!__builtin_constant_p(size))
return copy_user_generic((__force void *)dst, src, size);
switch (size) {
@@ -113,6 +117,8 @@ static __always_inline __must_check
int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
{
int ret = 0;
+
+ might_fault();
if (!__builtin_constant_p(size))
return copy_user_generic((__force void *)dst,
(__force void *)src, size);
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 834b2c1d89fb..d2e415e6666f 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -639,8 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate)
__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
#define __NR_timerfd_gettime 287
__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
-#define __NR_paccept 288
-__SYSCALL(__NR_paccept, sys_paccept)
+#define __NR_accept4 288
+__SYSCALL(__NR_accept4, sys_accept4)
#define __NR_signalfd4 289
__SYSCALL(__NR_signalfd4, sys_signalfd4)
#define __NR_eventfd2 290
diff --git a/arch/x86/include/asm/vmi.h b/arch/x86/include/asm/vmi.h
index b7c0dea119fe..61e08c0a2907 100644
--- a/arch/x86/include/asm/vmi.h
+++ b/arch/x86/include/asm/vmi.h
@@ -223,9 +223,15 @@ struct pci_header {
} __attribute__((packed));
/* Function prototypes for bootstrapping */
+#ifdef CONFIG_VMI
extern void vmi_init(void);
+extern void vmi_activate(void);
extern void vmi_bringup(void);
-extern void vmi_apply_boot_page_allocations(void);
+#else
+static inline void vmi_init(void) {}
+static inline void vmi_activate(void) {}
+static inline void vmi_bringup(void) {}
+#endif
/* State needed to start an application processor in an SMP system. */
struct vmi_ap_state {
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index e489ff9cb3e2..1cad9318d217 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -25,7 +25,7 @@ CFLAGS_tsc.o := $(nostackp)
obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
-obj-y += time_$(BITS).o ioport.o ldt.o
+obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
obj-$(CONFIG_X86_VISWS) += visws_quirks.o
obj-$(CONFIG_X86_32) += probe_roms_32.o
@@ -41,7 +41,7 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
obj-y += process.o
obj-y += i387.o xsave.o
obj-y += ptrace.o
-obj-y += ds.o
+obj-$(CONFIG_X86_DS) += ds.o
obj-$(CONFIG_X86_32) += tls.o
obj-$(CONFIG_IA32_EMULATION) += tls.o
obj-y += step.o
@@ -65,6 +65,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 8c1f76abae9e..65d0b72777ea 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1343,7 +1343,6 @@ static void __init acpi_process_madt(void)
error = acpi_parse_madt_ioapic_entries();
if (!error) {
acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
- acpi_irq_balance_set(NULL);
acpi_ioapic = 1;
smp_found_config = 1;
@@ -1361,6 +1360,17 @@ static void __init acpi_process_madt(void)
disable_acpi();
}
}
+
+ /*
+ * ACPI supports both logical (e.g. Hyper-Threading) and physical
+ * processors, where MPS only supports physical.
+ */
+ if (acpi_lapic && acpi_ioapic)
+ printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
+ "information\n");
+ else if (acpi_lapic)
+ printk(KERN_INFO "Using ACPI for processor (LAPIC) "
+ "configuration information\n");
#endif
return;
}
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 331b318304eb..a7b6dec6fc3f 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -187,6 +187,8 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
spin_lock_irqsave(&iommu->lock, flags);
ret = __iommu_queue_command(iommu, cmd);
+ if (!ret)
+ iommu->need_sync = 1;
spin_unlock_irqrestore(&iommu->lock, flags);
return ret;
@@ -210,10 +212,13 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
- iommu->need_sync = 0;
-
spin_lock_irqsave(&iommu->lock, flags);
+ if (!iommu->need_sync)
+ goto out;
+
+ iommu->need_sync = 0;
+
ret = __iommu_queue_command(iommu, &cmd);
if (ret)
@@ -254,8 +259,6 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
ret = iommu_queue_command(iommu, &cmd);
- iommu->need_sync = 1;
-
return ret;
}
@@ -281,8 +284,6 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
ret = iommu_queue_command(iommu, &cmd);
- iommu->need_sync = 1;
-
return ret;
}
@@ -343,7 +344,7 @@ static int iommu_map(struct protection_domain *dom,
u64 __pte, *pte, *page;
bus_addr = PAGE_ALIGN(bus_addr);
- phys_addr = PAGE_ALIGN(bus_addr);
+ phys_addr = PAGE_ALIGN(phys_addr);
/* only support 512GB address spaces for now */
if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
@@ -537,7 +538,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
address >>= PAGE_SHIFT;
iommu_area_free(dom->bitmap, address, pages);
- if (address + pages >= dom->next_bit)
+ if (address >= dom->next_bit)
dom->need_flush = true;
}
@@ -599,7 +600,7 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
continue;
p2 = IOMMU_PTE_PAGE(p1[i]);
- for (j = 0; j < 512; ++i) {
+ for (j = 0; j < 512; ++j) {
if (!IOMMU_PTE_PRESENT(p2[j]))
continue;
p3 = IOMMU_PTE_PAGE(p2[j]);
@@ -762,8 +763,6 @@ static void set_device_domain(struct amd_iommu *iommu,
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
iommu_queue_inv_dev_entry(iommu, devid);
-
- iommu->need_sync = 1;
}
/*****************************************************************************
@@ -858,6 +857,9 @@ static int get_device_resources(struct device *dev,
print_devid(_bdf, 1);
}
+ if (domain_for_device(_bdf) == NULL)
+ set_device_domain(*iommu, *domain, _bdf);
+
return 1;
}
@@ -908,7 +910,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
if (address >= dom->aperture_size)
return;
- WARN_ON(address & 0xfffULL || address > dom->aperture_size);
+ WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size);
pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
pte += IOMMU_PTE_L0_INDEX(address);
@@ -920,8 +922,8 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
/*
* This function contains common code for mapping of a physically
- * contiguous memory region into DMA address space. It is uses by all
- * mapping functions provided by this IOMMU driver.
+ * contiguous memory region into DMA address space. It is used by all
+ * mapping functions provided with this IOMMU driver.
* Must be called with the domain lock held.
*/
static dma_addr_t __map_single(struct device *dev,
@@ -981,7 +983,8 @@ static void __unmap_single(struct amd_iommu *iommu,
dma_addr_t i, start;
unsigned int pages;
- if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
+ if ((dma_addr == bad_dma_address) ||
+ (dma_addr + size > dma_dom->aperture_size))
return;
pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
@@ -1031,8 +1034,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
if (addr == bad_dma_address)
goto out;
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
out:
spin_unlock_irqrestore(&domain->lock, flags);
@@ -1060,8 +1062,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
__unmap_single(iommu, domain->priv, dma_addr, size, dir);
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
spin_unlock_irqrestore(&domain->lock, flags);
}
@@ -1127,8 +1128,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
goto unmap;
}
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
out:
spin_unlock_irqrestore(&domain->lock, flags);
@@ -1173,8 +1173,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
s->dma_address = s->dma_length = 0;
}
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
spin_unlock_irqrestore(&domain->lock, flags);
}
@@ -1225,8 +1224,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
goto out;
}
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
out:
spin_unlock_irqrestore(&domain->lock, flags);
@@ -1257,8 +1255,7 @@ static void free_coherent(struct device *dev, size_t size,
__unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
- if (unlikely(iommu->need_sync))
- iommu_completion_wait(iommu);
+ iommu_completion_wait(iommu);
spin_unlock_irqrestore(&domain->lock, flags);
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 0cdcda35a05f..30ae2701b3df 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -121,7 +121,7 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have
LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
we find in ACPI */
unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
-int amd_iommu_isolate; /* if 1, device isolation is enabled */
+int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */
bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
@@ -1213,7 +1213,9 @@ static int __init parse_amd_iommu_options(char *str)
for (; *str; ++str) {
if (strncmp(str, "isolate", 7) == 0)
amd_iommu_isolate = 1;
- if (strncmp(str, "fullflush", 11) == 0)
+ if (strncmp(str, "share", 5) == 0)
+ amd_iommu_isolate = 0;
+ if (strncmp(str, "fullflush", 9) == 0)
amd_iommu_unmap_flush = true;
}
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 04a7f960bbc0..edda4c00e3d2 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -141,7 +141,7 @@ static int lapic_next_event(unsigned long delta,
struct clock_event_device *evt);
static void lapic_timer_setup(enum clock_event_mode mode,
struct clock_event_device *evt);
-static void lapic_timer_broadcast(cpumask_t mask);
+static void lapic_timer_broadcast(const cpumask_t *mask);
static void apic_pm_activate(void);
/*
@@ -453,7 +453,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
/*
* Local APIC timer broadcast function
*/
-static void lapic_timer_broadcast(cpumask_t mask)
+static void lapic_timer_broadcast(const cpumask_t *mask)
{
#ifdef CONFIG_SMP
send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
@@ -469,7 +469,7 @@ static void __cpuinit setup_APIC_timer(void)
struct clock_event_device *levt = &__get_cpu_var(lapic_events);
memcpy(levt, &lapic_clockevent, sizeof(*levt));
- levt->cpumask = cpumask_of_cpu(smp_processor_id());
+ levt->cpumask = cpumask_of(smp_processor_id());
clockevents_register_device(levt);
}
@@ -1315,7 +1315,7 @@ void enable_x2apic(void)
}
}
-void enable_IR_x2apic(void)
+void __init enable_IR_x2apic(void)
{
#ifdef CONFIG_INTR_REMAP
int ret;
@@ -1903,8 +1903,8 @@ void __cpuinit generic_processor_info(int apicid, int version)
}
#endif
- cpu_set(cpu, cpu_possible_map);
- cpu_set(cpu, cpu_present_map);
+ set_cpu_possible(cpu, true);
+ set_cpu_present(cpu, true);
}
#ifdef CONFIG_X86_64
@@ -2106,7 +2106,7 @@ __cpuinit int apic_is_clustered_box(void)
bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
- for (i = 0; i < NR_CPUS; i++) {
+ for (i = 0; i < nr_cpu_ids; i++) {
/* are we being called early in kernel startup? */
if (bios_cpu_apicid) {
id = bios_cpu_apicid[i];
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 5145a6e72bbb..3a26525a3f31 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -391,11 +391,7 @@ static int power_off;
#else
static int power_off = 1;
#endif
-#ifdef CONFIG_APM_REAL_MODE_POWER_OFF
-static int realmode_power_off = 1;
-#else
static int realmode_power_off;
-#endif
#ifdef CONFIG_APM_ALLOW_INTS
static int allow_ints = 1;
#else
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8e48c5d4467d..88ea02dcb622 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,6 +33,7 @@
#include <linux/cpufreq.h>
#include <linux/compiler.h>
#include <linux/dmi.h>
+#include <linux/ftrace.h>
#include <linux/acpi.h>
#include <acpi/processor.h>
@@ -391,6 +392,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
unsigned int next_perf_state = 0; /* Index into perf table */
unsigned int i;
int result = 0;
+ struct power_trace it;
dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
@@ -427,6 +429,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
}
}
+ trace_power_mark(&it, POWER_PSTATE, next_perf_state);
+
switch (data->cpu_feature) {
case SYSTEM_INTEL_MSR_CAPABLE:
cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index d3dcd58b87cd..7f05f44b97e9 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -115,9 +115,20 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
u32 i = 0;
if (cpu_family == CPU_HW_PSTATE) {
- rdmsr(MSR_PSTATE_STATUS, lo, hi);
- i = lo & HW_PSTATE_MASK;
- data->currpstate = i;
+ if (data->currpstate == HW_PSTATE_INVALID) {
+ /* read (initial) hw pstate if not yet set */
+ rdmsr(MSR_PSTATE_STATUS, lo, hi);
+ i = lo & HW_PSTATE_MASK;
+
+ /*
+ * a workaround for family 11h erratum 311 might cause
+ * an "out-of-range Pstate if the core is in Pstate-0
+ */
+ if (i >= data->numps)
+ data->currpstate = HW_PSTATE_0;
+ else
+ data->currpstate = i;
+ }
return 0;
}
do {
@@ -1121,6 +1132,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
}
data->cpu = pol->cpu;
+ data->currpstate = HW_PSTATE_INVALID;
if (powernow_k8_cpu_init_acpi(data)) {
/*
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index ab48cfed4d96..65cfb5d7f77f 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -5,6 +5,19 @@
* http://www.gnu.org/licenses/gpl.html
*/
+
+enum pstate {
+ HW_PSTATE_INVALID = 0xff,
+ HW_PSTATE_0 = 0,
+ HW_PSTATE_1 = 1,
+ HW_PSTATE_2 = 2,
+ HW_PSTATE_3 = 3,
+ HW_PSTATE_4 = 4,
+ HW_PSTATE_5 = 5,
+ HW_PSTATE_6 = 6,
+ HW_PSTATE_7 = 7,
+};
+
struct powernow_k8_data {
unsigned int cpu;
@@ -23,7 +36,9 @@ struct powernow_k8_data {
u32 exttype; /* extended interface = 1 */
/* keep track of the current fid / vid or pstate */
- u32 currvid, currfid, currpstate;
+ u32 currvid;
+ u32 currfid;
+ enum pstate currpstate;
/* the powernow_table includes all frequency and vid/fid pairings:
* fid are the lower 8 bits of the index, vid are the upper 8 bits.
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index cce0b6118d55..816f27f289b1 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -307,12 +307,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_P4);
if (c->x86 == 6)
set_cpu_cap(c, X86_FEATURE_P3);
+#endif
if (cpu_has_bts)
ptrace_bts_init_intel(c);
-#endif
-
detect_extended_topology(c);
if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
/*
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 3f46afbb1cf1..fb7f946cb65e 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -534,31 +534,16 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
per_cpu(cpuid4_info, cpu) = NULL;
}
-static int __cpuinit detect_cache_attributes(unsigned int cpu)
+static void get_cpu_leaves(void *_retval)
{
- struct _cpuid4_info *this_leaf;
- unsigned long j;
- int retval;
- cpumask_t oldmask;
-
- if (num_cache_leaves == 0)
- return -ENOENT;
-
- per_cpu(cpuid4_info, cpu) = kzalloc(
- sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
- if (per_cpu(cpuid4_info, cpu) == NULL)
- return -ENOMEM;
-
- oldmask = current->cpus_allowed;
- retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
- if (retval)
- goto out;
+ int j, *retval = _retval, cpu = smp_processor_id();
/* Do cpuid and store the results */
for (j = 0; j < num_cache_leaves; j++) {
+ struct _cpuid4_info *this_leaf;
this_leaf = CPUID4_INFO_IDX(cpu, j);
- retval = cpuid4_cache_lookup(j, this_leaf);
- if (unlikely(retval < 0)) {
+ *retval = cpuid4_cache_lookup(j, this_leaf);
+ if (unlikely(*retval < 0)) {
int i;
for (i = 0; i < j; i++)
@@ -567,9 +552,21 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
}
cache_shared_cpu_map_setup(cpu, j);
}
- set_cpus_allowed_ptr(current, &oldmask);
+}
+
+static int __cpuinit detect_cache_attributes(unsigned int cpu)
+{
+ int retval;
+
+ if (num_cache_leaves == 0)
+ return -ENOENT;
+
+ per_cpu(cpuid4_info, cpu) = kzalloc(
+ sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
+ if (per_cpu(cpuid4_info, cpu) == NULL)
+ return -ENOMEM;
-out:
+ smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
if (retval) {
kfree(per_cpu(cpuid4_info, cpu));
per_cpu(cpuid4_info, cpu) = NULL;
@@ -626,8 +623,8 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
cpumask_t *mask = &this_leaf->shared_cpu_map;
n = type?
- cpulist_scnprintf(buf, len-2, *mask):
- cpumask_scnprintf(buf, len-2, *mask);
+ cpulist_scnprintf(buf, len-2, mask) :
+ cpumask_scnprintf(buf, len-2, mask);
buf[n++] = '\n';
buf[n] = '\0';
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 5eb390a4b2e9..a1de80f368f1 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -83,34 +83,41 @@ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
* CPU Initialization
*/
+struct thresh_restart {
+ struct threshold_block *b;
+ int reset;
+ u16 old_limit;
+};
+
/* must be called with correct cpu affinity */
-static void threshold_restart_bank(struct threshold_block *b,
- int reset, u16 old_limit)
+static long threshold_restart_bank(void *_tr)
{
+ struct thresh_restart *tr = _tr;
u32 mci_misc_hi, mci_misc_lo;
- rdmsr(b->address, mci_misc_lo, mci_misc_hi);
+ rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
- if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
- reset = 1; /* limit cannot be lower than err count */
+ if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
+ tr->reset = 1; /* limit cannot be lower than err count */
- if (reset) { /* reset err count and overflow bit */
+ if (tr->reset) { /* reset err count and overflow bit */
mci_misc_hi =
(mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
- (THRESHOLD_MAX - b->threshold_limit);
- } else if (old_limit) { /* change limit w/o reset */
+ (THRESHOLD_MAX - tr->b->threshold_limit);
+ } else if (tr->old_limit) { /* change limit w/o reset */
int new_count = (mci_misc_hi & THRESHOLD_MAX) +
- (old_limit - b->threshold_limit);
+ (tr->old_limit - tr->b->threshold_limit);
mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
(new_count & THRESHOLD_MAX);
}
- b->interrupt_enable ?
+ tr->b->interrupt_enable ?
(mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
(mci_misc_hi &= ~MASK_INT_TYPE_HI);
mci_misc_hi |= MASK_COUNT_EN_HI;
- wrmsr(b->address, mci_misc_lo, mci_misc_hi);
+ wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
+ return 0;
}
/* cpu init entry point, called from mce.c with preempt off */
@@ -120,6 +127,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
unsigned int cpu = smp_processor_id();
u8 lvt_off;
u32 low = 0, high = 0, address = 0;
+ struct thresh_restart tr;
for (bank = 0; bank < NR_BANKS; ++bank) {
for (block = 0; block < NR_BLOCKS; ++block) {
@@ -162,7 +170,10 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
wrmsr(address, low, high);
threshold_defaults.address = address;
- threshold_restart_bank(&threshold_defaults, 0, 0);
+ tr.b = &threshold_defaults;
+ tr.reset = 0;
+ tr.old_limit = 0;
+ threshold_restart_bank(&tr);
}
}
}
@@ -251,20 +262,6 @@ struct threshold_attr {
ssize_t(*store) (struct threshold_block *, const char *, size_t count);
};
-static void affinity_set(unsigned int cpu, cpumask_t *oldmask,
- cpumask_t *newmask)
-{
- *oldmask = current->cpus_allowed;
- cpus_clear(*newmask);
- cpu_set(cpu, *newmask);
- set_cpus_allowed_ptr(current, newmask);
-}
-
-static void affinity_restore(const cpumask_t *oldmask)
-{
- set_cpus_allowed_ptr(current, oldmask);
-}
-
#define SHOW_FIELDS(name) \
static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
{ \
@@ -277,15 +274,16 @@ static ssize_t store_interrupt_enable(struct threshold_block *b,
const char *buf, size_t count)
{
char *end;
- cpumask_t oldmask, newmask;
+ struct thresh_restart tr;
unsigned long new = simple_strtoul(buf, &end, 0);
if (end == buf)
return -EINVAL;
b->interrupt_enable = !!new;
- affinity_set(b->cpu, &oldmask, &newmask);
- threshold_restart_bank(b, 0, 0);
- affinity_restore(&oldmask);
+ tr.b = b;
+ tr.reset = 0;
+ tr.old_limit = 0;
+ work_on_cpu(b->cpu, threshold_restart_bank, &tr);
return end - buf;
}
@@ -294,8 +292,7 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
const char *buf, size_t count)
{
char *end;
- cpumask_t oldmask, newmask;
- u16 old;
+ struct thresh_restart tr;
unsigned long new = simple_strtoul(buf, &end, 0);
if (end == buf)
return -EINVAL;
@@ -303,34 +300,36 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
new = THRESHOLD_MAX;
if (new < 1)
new = 1;
- old = b->threshold_limit;
+ tr.old_limit = b->threshold_limit;
b->threshold_limit = new;
+ tr.b = b;
+ tr.reset = 0;
- affinity_set(b->cpu, &oldmask, &newmask);
- threshold_restart_bank(b, 0, old);
- affinity_restore(&oldmask);
+ work_on_cpu(b->cpu, threshold_restart_bank, &tr);
return end - buf;
}
-static ssize_t show_error_count(struct threshold_block *b, char *buf)
+static long local_error_count(void *_b)
{
- u32 high, low;
- cpumask_t oldmask, newmask;
- affinity_set(b->cpu, &oldmask, &newmask);
+ struct threshold_block *b = _b;
+ u32 low, high;
+
rdmsr(b->address, low, high);
- affinity_restore(&oldmask);
- return sprintf(buf, "%x\n",
- (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
+ return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
+}
+
+static ssize_t show_error_count(struct threshold_block *b, char *buf)
+{
+ return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b));
}
static ssize_t store_error_count(struct threshold_block *b,
const char *buf, size_t count)
{
- cpumask_t oldmask, newmask;
- affinity_set(b->cpu, &oldmask, &newmask);
- threshold_restart_bank(b, 1, 0);
- affinity_restore(&oldmask);
+ struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
+
+ work_on_cpu(b->cpu, threshold_restart_bank, &tr);
return 1;
}
@@ -463,12 +462,19 @@ out_free:
return err;
}
+static long local_allocate_threshold_blocks(void *_bank)
+{
+ unsigned int *bank = _bank;
+
+ return allocate_threshold_blocks(smp_processor_id(), *bank, 0,
+ MSR_IA32_MC0_MISC + *bank * 4);
+}
+
/* symlinks sibling shared banks to first core. first core owns dir/files. */
static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
{
int i, err = 0;
struct threshold_bank *b = NULL;
- cpumask_t oldmask, newmask;
char name[32];
sprintf(name, "threshold_bank%i", bank);
@@ -519,11 +525,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
per_cpu(threshold_banks, cpu)[bank] = b;
- affinity_set(cpu, &oldmask, &newmask);
- err = allocate_threshold_blocks(cpu, bank, 0,
- MSR_IA32_MC0_MISC + bank * 4);
- affinity_restore(&oldmask);
-
+ err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank);
if (err)
goto out_free;
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 2b69994fd3a8..19a8c2c0389f 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -7,13 +7,12 @@
*
* It manages:
* - per-thread and per-cpu allocation of BTS and PEBS
- * - buffer memory allocation (optional)
- * - buffer overflow handling
+ * - buffer overflow handling (to be done)
* - buffer access
*
* It assumes:
- * - get_task_struct on all parameter tasks
- * - current is allowed to trace parameter tasks
+ * - get_task_struct on all traced tasks
+ * - current is allowed to trace tasks
*
*
* Copyright (C) 2007-2008 Intel Corporation.
@@ -21,8 +20,6 @@
*/
-#ifdef CONFIG_X86_DS
-
#include <asm/ds.h>
#include <linux/errno.h>
@@ -30,6 +27,7 @@
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/mm.h>
+#include <linux/kernel.h>
/*
@@ -46,6 +44,33 @@ struct ds_configuration {
};
static struct ds_configuration ds_cfg;
+/*
+ * A BTS or PEBS tracer.
+ *
+ * This holds the configuration of the tracer and serves as a handle
+ * to identify tracers.
+ */
+struct ds_tracer {
+ /* the DS context (partially) owned by this tracer */
+ struct ds_context *context;
+ /* the buffer provided on ds_request() and its size in bytes */
+ void *buffer;
+ size_t size;
+};
+
+struct bts_tracer {
+ /* the common DS part */
+ struct ds_tracer ds;
+ /* buffer overflow notification function */
+ bts_ovfl_callback_t ovfl;
+};
+
+struct pebs_tracer {
+ /* the common DS part */
+ struct ds_tracer ds;
+ /* buffer overflow notification function */
+ pebs_ovfl_callback_t ovfl;
+};
/*
* Debug Store (DS) save area configuration (see Intel64 and IA32
@@ -109,34 +134,13 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
(*(unsigned long *)base) = value;
}
+#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
-/*
- * Locking is done only for allocating BTS or PEBS resources and for
- * guarding context and buffer memory allocation.
- *
- * Most functions require the current task to own the ds context part
- * they are going to access. All the locking is done when validating
- * access to the context.
- */
-static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
/*
- * Validate that the current task is allowed to access the BTS/PEBS
- * buffer of the parameter task.
- *
- * Returns 0, if access is granted; -Eerrno, otherwise.
+ * Locking is done only for allocating BTS or PEBS resources.
*/
-static inline int ds_validate_access(struct ds_context *context,
- enum ds_qualifier qual)
-{
- if (!context)
- return -EPERM;
-
- if (context->owner[qual] == current)
- return 0;
-
- return -EPERM;
-}
+static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
/*
@@ -185,80 +189,43 @@ static inline int check_tracer(struct task_struct *task)
*
* Contexts are use-counted. They are allocated on first access and
* deallocated when the last user puts the context.
- *
- * We distinguish between an allocating and a non-allocating get of a
- * context:
- * - the allocating get is used for requesting BTS/PEBS resources. It
- * requires the caller to hold the global ds_lock.
- * - the non-allocating get is used for all other cases. A
- * non-existing context indicates an error. It acquires and releases
- * the ds_lock itself for obtaining the context.
- *
- * A context and its DS configuration are allocated and deallocated
- * together. A context always has a DS configuration of the
- * appropriate size.
*/
static DEFINE_PER_CPU(struct ds_context *, system_context);
#define this_system_context per_cpu(system_context, smp_processor_id())
-/*
- * Returns the pointer to the parameter task's context or to the
- * system-wide context, if task is NULL.
- *
- * Increases the use count of the returned context, if not NULL.
- */
static inline struct ds_context *ds_get_context(struct task_struct *task)
{
- struct ds_context *context;
-
- spin_lock(&ds_lock);
-
- context = (task ? task->thread.ds_ctx : this_system_context);
- if (context)
- context->count++;
-
- spin_unlock(&ds_lock);
-
- return context;
-}
-
-/*
- * Same as ds_get_context, but allocates the context and it's DS
- * structure, if necessary; returns NULL; if out of memory.
- *
- * pre: requires ds_lock to be held
- */
-static inline struct ds_context *ds_alloc_context(struct task_struct *task)
-{
struct ds_context **p_context =
(task ? &task->thread.ds_ctx : &this_system_context);
struct ds_context *context = *p_context;
+ unsigned long irq;
if (!context) {
context = kzalloc(sizeof(*context), GFP_KERNEL);
-
if (!context)
return NULL;
- context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
- if (!context->ds) {
- kfree(context);
- return NULL;
- }
+ spin_lock_irqsave(&ds_lock, irq);
- *p_context = context;
+ if (*p_context) {
+ kfree(context);
- context->this = p_context;
- context->task = task;
+ context = *p_context;
+ } else {
+ *p_context = context;
- if (task)
- set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
+ context->this = p_context;
+ context->task = task;
- if (!task || (task == current))
- wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0);
+ if (task)
+ set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
- get_tracer(task);
+ if (!task || (task == current))
+ wrmsrl(MSR_IA32_DS_AREA,
+ (unsigned long)context->ds);
+ }
+ spin_unlock_irqrestore(&ds_lock, irq);
}
context->count++;
@@ -266,16 +233,14 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
return context;
}
-/*
- * Decreases the use count of the parameter context, if not NULL.
- * Deallocates the context, if the use count reaches zero.
- */
static inline void ds_put_context(struct ds_context *context)
{
+ unsigned long irq;
+
if (!context)
return;
- spin_lock(&ds_lock);
+ spin_lock_irqsave(&ds_lock, irq);
if (--context->count)
goto out;
@@ -288,351 +253,351 @@ static inline void ds_put_context(struct ds_context *context)
if (!context->task || (context->task == current))
wrmsrl(MSR_IA32_DS_AREA, 0);
- put_tracer(context->task);
-
- /* free any leftover buffers from tracers that did not
- * deallocate them properly. */
- kfree(context->buffer[ds_bts]);
- kfree(context->buffer[ds_pebs]);
- kfree(context->ds);
kfree(context);
out:
- spin_unlock(&ds_lock);
+ spin_unlock_irqrestore(&ds_lock, irq);
}
/*
* Handle a buffer overflow
*
- * task: the task whose buffers are overflowing;
- * NULL for a buffer overflow on the current cpu
* context: the ds context
* qual: the buffer type
*/
-static void ds_overflow(struct task_struct *task, struct ds_context *context,
- enum ds_qualifier qual)
-{
- if (!context)
- return;
-
- if (context->callback[qual])
- (*context->callback[qual])(task);
-
- /* todo: do some more overflow handling */
+static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
+{
+ switch (qual) {
+ case ds_bts: {
+ struct bts_tracer *tracer =
+ container_of(context->owner[qual],
+ struct bts_tracer, ds);
+ if (tracer->ovfl)
+ tracer->ovfl(tracer);
+ }
+ break;
+ case ds_pebs: {
+ struct pebs_tracer *tracer =
+ container_of(context->owner[qual],
+ struct pebs_tracer, ds);
+ if (tracer->ovfl)
+ tracer->ovfl(tracer);
+ }
+ break;
+ }
}
-/*
- * Allocate a non-pageable buffer of the parameter size.
- * Checks the memory and the locked memory rlimit.
- *
- * Returns the buffer, if successful;
- * NULL, if out of memory or rlimit exceeded.
- *
- * size: the requested buffer size in bytes
- * pages (out): if not NULL, contains the number of pages reserved
- */
-static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
+static void ds_install_ds_config(struct ds_context *context,
+ enum ds_qualifier qual,
+ void *base, size_t size, size_t ith)
{
- unsigned long rlim, vm, pgsz;
- void *buffer;
-
- pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
- rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
- vm = current->mm->total_vm + pgsz;
- if (rlim < vm)
- return NULL;
+ unsigned long buffer, adj;
- rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
- vm = current->mm->locked_vm + pgsz;
- if (rlim < vm)
- return NULL;
+ /* adjust the buffer address and size to meet alignment
+ * constraints:
+ * - buffer is double-word aligned
+ * - size is multiple of record size
+ *
+ * We checked the size at the very beginning; we have enough
+ * space to do the adjustment.
+ */
+ buffer = (unsigned long)base;
- buffer = kzalloc(size, GFP_KERNEL);
- if (!buffer)
- return NULL;
+ adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
+ buffer += adj;
+ size -= adj;
- current->mm->total_vm += pgsz;
- current->mm->locked_vm += pgsz;
+ size /= ds_cfg.sizeof_rec[qual];
+ size *= ds_cfg.sizeof_rec[qual];
- if (pages)
- *pages = pgsz;
+ ds_set(context->ds, qual, ds_buffer_base, buffer);
+ ds_set(context->ds, qual, ds_index, buffer);
+ ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
- return buffer;
+ /* The value for 'no threshold' is -1, which will set the
+ * threshold outside of the buffer, just like we want it.
+ */
+ ds_set(context->ds, qual,
+ ds_interrupt_threshold, buffer + size - ith);
}
-static int ds_request(struct task_struct *task, void *base, size_t size,
- ds_ovfl_callback_t ovfl, enum ds_qualifier qual)
+static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual,
+ struct task_struct *task,
+ void *base, size_t size, size_t th)
{
struct ds_context *context;
- unsigned long buffer, adj;
- const unsigned long alignment = (1 << 3);
- int error = 0;
+ unsigned long irq;
+ int error;
+ error = -EOPNOTSUPP;
if (!ds_cfg.sizeof_ds)
- return -EOPNOTSUPP;
+ goto out;
- /* we require some space to do alignment adjustments below */
- if (size < (alignment + ds_cfg.sizeof_rec[qual]))
- return -EINVAL;
+ error = -EINVAL;
+ if (!base)
+ goto out;
- /* buffer overflow notification is not yet implemented */
- if (ovfl)
- return -EOPNOTSUPP;
+ /* we require some space to do alignment adjustments below */
+ error = -EINVAL;
+ if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
+ goto out;
+ if (th != (size_t)-1) {
+ th *= ds_cfg.sizeof_rec[qual];
- spin_lock(&ds_lock);
+ error = -EINVAL;
+ if (size <= th)
+ goto out;
+ }
- if (!check_tracer(task))
- return -EPERM;
+ tracer->buffer = base;
+ tracer->size = size;
error = -ENOMEM;
- context = ds_alloc_context(task);
+ context = ds_get_context(task);
if (!context)
- goto out_unlock;
-
- error = -EALREADY;
- if (context->owner[qual] == current)
- goto out_unlock;
- error = -EPERM;
- if (context->owner[qual] != NULL)
- goto out_unlock;
- context->owner[qual] = current;
-
- spin_unlock(&ds_lock);
+ goto out;
+ tracer->context = context;
- error = -ENOMEM;
- if (!base) {
- base = ds_allocate_buffer(size, &context->pages[qual]);
- if (!base)
- goto out_release;
-
- context->buffer[qual] = base;
- }
- error = 0;
+ spin_lock_irqsave(&ds_lock, irq);
- context->callback[qual] = ovfl;
-
- /* adjust the buffer address and size to meet alignment
- * constraints:
- * - buffer is double-word aligned
- * - size is multiple of record size
- *
- * We checked the size at the very beginning; we have enough
- * space to do the adjustment.
- */
- buffer = (unsigned long)base;
+ error = -EPERM;
+ if (!check_tracer(task))
+ goto out_unlock;
+ get_tracer(task);
- adj = ALIGN(buffer, alignment) - buffer;
- buffer += adj;
- size -= adj;
+ error = -EPERM;
+ if (context->owner[qual])
+ goto out_put_tracer;
+ context->owner[qual] = tracer;
- size /= ds_cfg.sizeof_rec[qual];
- size *= ds_cfg.sizeof_rec[qual];
+ spin_unlock_irqrestore(&ds_lock, irq);
- ds_set(context->ds, qual, ds_buffer_base, buffer);
- ds_set(context->ds, qual, ds_index, buffer);
- ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
- if (ovfl) {
- /* todo: select a suitable interrupt threshold */
- } else
- ds_set(context->ds, qual,
- ds_interrupt_threshold, buffer + size + 1);
+ ds_install_ds_config(context, qual, base, size, th);
- /* we keep the context until ds_release */
- return error;
-
- out_release:
- context->owner[qual] = NULL;
- ds_put_context(context);
- return error;
+ return 0;
+ out_put_tracer:
+ put_tracer(task);
out_unlock:
- spin_unlock(&ds_lock);
+ spin_unlock_irqrestore(&ds_lock, irq);
ds_put_context(context);
+ tracer->context = NULL;
+ out:
return error;
}
-int ds_request_bts(struct task_struct *task, void *base, size_t size,
- ds_ovfl_callback_t ovfl)
+struct bts_tracer *ds_request_bts(struct task_struct *task,
+ void *base, size_t size,
+ bts_ovfl_callback_t ovfl, size_t th)
{
- return ds_request(task, base, size, ovfl, ds_bts);
-}
+ struct bts_tracer *tracer;
+ int error;
-int ds_request_pebs(struct task_struct *task, void *base, size_t size,
- ds_ovfl_callback_t ovfl)
-{
- return ds_request(task, base, size, ovfl, ds_pebs);
+ /* buffer overflow notification is not yet implemented */
+ error = -EOPNOTSUPP;
+ if (ovfl)
+ goto out;
+
+ error = -ENOMEM;
+ tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
+ if (!tracer)
+ goto out;
+ tracer->ovfl = ovfl;
+
+ error = ds_request(&tracer->ds, ds_bts, task, base, size, th);
+ if (error < 0)
+ goto out_tracer;
+
+ return tracer;
+
+ out_tracer:
+ kfree(tracer);
+ out:
+ return ERR_PTR(error);
}
-static int ds_release(struct task_struct *task, enum ds_qualifier qual)
+struct pebs_tracer *ds_request_pebs(struct task_struct *task,
+ void *base, size_t size,
+ pebs_ovfl_callback_t ovfl, size_t th)
{
- struct ds_context *context;
+ struct pebs_tracer *tracer;
int error;
- context = ds_get_context(task);
- error = ds_validate_access(context, qual);
- if (error < 0)
+ /* buffer overflow notification is not yet implemented */
+ error = -EOPNOTSUPP;
+ if (ovfl)
+ goto out;
+
+ error = -ENOMEM;
+ tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
+ if (!tracer)
goto out;
+ tracer->ovfl = ovfl;
- kfree(context->buffer[qual]);
- context->buffer[qual] = NULL;
+ error = ds_request(&tracer->ds, ds_pebs, task, base, size, th);
+ if (error < 0)
+ goto out_tracer;
- current->mm->total_vm -= context->pages[qual];
- current->mm->locked_vm -= context->pages[qual];
- context->pages[qual] = 0;
- context->owner[qual] = NULL;
+ return tracer;
- /*
- * we put the context twice:
- * once for the ds_get_context
- * once for the corresponding ds_request
- */
- ds_put_context(context);
+ out_tracer:
+ kfree(tracer);
out:
- ds_put_context(context);
- return error;
+ return ERR_PTR(error);
}
-int ds_release_bts(struct task_struct *task)
+static void ds_release(struct ds_tracer *tracer, enum ds_qualifier qual)
{
- return ds_release(task, ds_bts);
+ BUG_ON(tracer->context->owner[qual] != tracer);
+ tracer->context->owner[qual] = NULL;
+
+ put_tracer(tracer->context->task);
+ ds_put_context(tracer->context);
}
-int ds_release_pebs(struct task_struct *task)
+int ds_release_bts(struct bts_tracer *tracer)
{
- return ds_release(task, ds_pebs);
+ if (!tracer)
+ return -EINVAL;
+
+ ds_release(&tracer->ds, ds_bts);
+ kfree(tracer);
+
+ return 0;
}
-static int ds_get_index(struct task_struct *task, size_t *pos,
- enum ds_qualifier qual)
+int ds_release_pebs(struct pebs_tracer *tracer)
{
- struct ds_context *context;
- unsigned long base, index;
- int error;
+ if (!tracer)
+ return -EINVAL;
- context = ds_get_context(task);
- error = ds_validate_access(context, qual);
- if (error < 0)
- goto out;
+ ds_release(&tracer->ds, ds_pebs);
+ kfree(tracer);
+
+ return 0;
+}
+
+static size_t ds_get_index(struct ds_context *context, enum ds_qualifier qual)
+{
+ unsigned long base, index;
base = ds_get(context->ds, qual, ds_buffer_base);
index = ds_get(context->ds, qual, ds_index);
- error = ((index - base) / ds_cfg.sizeof_rec[qual]);
- if (pos)
- *pos = error;
- out:
- ds_put_context(context);
- return error;
+ return (index - base) / ds_cfg.sizeof_rec[qual];
}
-int ds_get_bts_index(struct task_struct *task, size_t *pos)
+int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos)
{
- return ds_get_index(task, pos, ds_bts);
+ if (!tracer)
+ return -EINVAL;
+
+ if (!pos)
+ return -EINVAL;
+
+ *pos = ds_get_index(tracer->ds.context, ds_bts);
+
+ return 0;
}
-int ds_get_pebs_index(struct task_struct *task, size_t *pos)
+int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos)
{
- return ds_get_index(task, pos, ds_pebs);
+ if (!tracer)
+ return -EINVAL;
+
+ if (!pos)
+ return -EINVAL;
+
+ *pos = ds_get_index(tracer->ds.context, ds_pebs);
+
+ return 0;
}
-static int ds_get_end(struct task_struct *task, size_t *pos,
- enum ds_qualifier qual)
+static size_t ds_get_end(struct ds_context *context, enum ds_qualifier qual)
{
- struct ds_context *context;
- unsigned long base, end;
- int error;
-
- context = ds_get_context(task);
- error = ds_validate_access(context, qual);
- if (error < 0)
- goto out;
+ unsigned long base, max;
base = ds_get(context->ds, qual, ds_buffer_base);
- end = ds_get(context->ds, qual, ds_absolute_maximum);
+ max = ds_get(context->ds, qual, ds_absolute_maximum);
- error = ((end - base) / ds_cfg.sizeof_rec[qual]);
- if (pos)
- *pos = error;
- out:
- ds_put_context(context);
- return error;
+ return (max - base) / ds_cfg.sizeof_rec[qual];
}
-int ds_get_bts_end(struct task_struct *task, size_t *pos)
+int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos)
{
- return ds_get_end(task, pos, ds_bts);
+ if (!tracer)
+ return -EINVAL;
+
+ if (!pos)
+ return -EINVAL;
+
+ *pos = ds_get_end(tracer->ds.context, ds_bts);
+
+ return 0;
}
-int ds_get_pebs_end(struct task_struct *task, size_t *pos)
+int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos)
{
- return ds_get_end(task, pos, ds_pebs);
+ if (!tracer)
+ return -EINVAL;
+
+ if (!pos)
+ return -EINVAL;
+
+ *pos = ds_get_end(tracer->ds.context, ds_pebs);
+
+ return 0;
}
-static int ds_access(struct task_struct *task, size_t index,
- const void **record, enum ds_qualifier qual)
+static int ds_access(struct ds_context *context, enum ds_qualifier qual,
+ size_t index, const void **record)
{
- struct ds_context *context;
unsigned long base, idx;
- int error;
if (!record)
return -EINVAL;
- context = ds_get_context(task);
- error = ds_validate_access(context, qual);
- if (error < 0)
- goto out;
-
base = ds_get(context->ds, qual, ds_buffer_base);
idx = base + (index * ds_cfg.sizeof_rec[qual]);
- error = -EINVAL;
if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
- goto out;
+ return -EINVAL;
*record = (const void *)idx;
- error = ds_cfg.sizeof_rec[qual];
- out:
- ds_put_context(context);
- return error;
+
+ return ds_cfg.sizeof_rec[qual];
}
-int ds_access_bts(struct task_struct *task, size_t index, const void **record)
+int ds_access_bts(struct bts_tracer *tracer, size_t index,
+ const void **record)
{
- return ds_access(task, index, record, ds_bts);
+ if (!tracer)
+ return -EINVAL;
+
+ return ds_access(tracer->ds.context, ds_bts, index, record);
}
-int ds_access_pebs(struct task_struct *task, size_t index, const void **record)
+int ds_access_pebs(struct pebs_tracer *tracer, size_t index,
+ const void **record)
{
- return ds_access(task, index, record, ds_pebs);
+ if (!tracer)
+ return -EINVAL;
+
+ return ds_access(tracer->ds.context, ds_pebs, index, record);
}
-static int ds_write(struct task_struct *task, const void *record, size_t size,
- enum ds_qualifier qual, int force)
+static int ds_write(struct ds_context *context, enum ds_qualifier qual,
+ const void *record, size_t size)
{
- struct ds_context *context;
- int error;
+ int bytes_written = 0;
if (!record)
return -EINVAL;
- error = -EPERM;
- context = ds_get_context(task);
- if (!context)
- goto out;
-
- if (!force) {
- error = ds_validate_access(context, qual);
- if (error < 0)
- goto out;
- }
-
- error = 0;
while (size) {
unsigned long base, index, end, write_end, int_th;
unsigned long write_size, adj_write_size;
@@ -660,14 +625,14 @@ static int ds_write(struct task_struct *task, const void *record, size_t size,
write_end = end;
if (write_end <= index)
- goto out;
+ break;
write_size = min((unsigned long) size, write_end - index);
memcpy((void *)index, record, write_size);
record = (const char *)record + write_size;
- size -= write_size;
- error += write_size;
+ size -= write_size;
+ bytes_written += write_size;
adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
adj_write_size *= ds_cfg.sizeof_rec[qual];
@@ -682,47 +647,32 @@ static int ds_write(struct task_struct *task, const void *record, size_t size,
ds_set(context->ds, qual, ds_index, index);
if (index >= int_th)
- ds_overflow(task, context, qual);
+ ds_overflow(context, qual);
}
- out:
- ds_put_context(context);
- return error;
+ return bytes_written;
}
-int ds_write_bts(struct task_struct *task, const void *record, size_t size)
+int ds_write_bts(struct bts_tracer *tracer, const void *record, size_t size)
{
- return ds_write(task, record, size, ds_bts, /* force = */ 0);
-}
+ if (!tracer)
+ return -EINVAL;
-int ds_write_pebs(struct task_struct *task, const void *record, size_t size)
-{
- return ds_write(task, record, size, ds_pebs, /* force = */ 0);
+ return ds_write(tracer->ds.context, ds_bts, record, size);
}
-int ds_unchecked_write_bts(struct task_struct *task,
- const void *record, size_t size)
+int ds_write_pebs(struct pebs_tracer *tracer, const void *record, size_t size)
{
- return ds_write(task, record, size, ds_bts, /* force = */ 1);
-}
+ if (!tracer)
+ return -EINVAL;
-int ds_unchecked_write_pebs(struct task_struct *task,
- const void *record, size_t size)
-{
- return ds_write(task, record, size, ds_pebs, /* force = */ 1);
+ return ds_write(tracer->ds.context, ds_pebs, record, size);
}
-static int ds_reset_or_clear(struct task_struct *task,
- enum ds_qualifier qual, int clear)
+static void ds_reset_or_clear(struct ds_context *context,
+ enum ds_qualifier qual, int clear)
{
- struct ds_context *context;
unsigned long base, end;
- int error;
-
- context = ds_get_context(task);
- error = ds_validate_access(context, qual);
- if (error < 0)
- goto out;
base = ds_get(context->ds, qual, ds_buffer_base);
end = ds_get(context->ds, qual, ds_absolute_maximum);
@@ -731,89 +681,100 @@ static int ds_reset_or_clear(struct task_struct *task,
memset((void *)base, 0, end - base);
ds_set(context->ds, qual, ds_index, base);
-
- error = 0;
- out:
- ds_put_context(context);
- return error;
}
-int ds_reset_bts(struct task_struct *task)
+int ds_reset_bts(struct bts_tracer *tracer)
{
- return ds_reset_or_clear(task, ds_bts, /* clear = */ 0);
+ if (!tracer)
+ return -EINVAL;
+
+ ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 0);
+
+ return 0;
}
-int ds_reset_pebs(struct task_struct *task)
+int ds_reset_pebs(struct pebs_tracer *tracer)
{
- return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0);
+ if (!tracer)
+ return -EINVAL;
+
+ ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 0);
+
+ return 0;
}
-int ds_clear_bts(struct task_struct *task)
+int ds_clear_bts(struct bts_tracer *tracer)
{
- return ds_reset_or_clear(task, ds_bts, /* clear = */ 1);
+ if (!tracer)
+ return -EINVAL;
+
+ ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 1);
+
+ return 0;
}
-int ds_clear_pebs(struct task_struct *task)
+int ds_clear_pebs(struct pebs_tracer *tracer)
{
- return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1);
+ if (!tracer)
+ return -EINVAL;
+
+ ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 1);
+
+ return 0;
}
-int ds_get_pebs_reset(struct task_struct *task, u64 *value)
+int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value)
{
- struct ds_context *context;
- int error;
+ if (!tracer)
+ return -EINVAL;
if (!value)
return -EINVAL;
- context = ds_get_context(task);
- error = ds_validate_access(context, ds_pebs);
- if (error < 0)
- goto out;
+ *value = *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
- *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8));
-
- error = 0;
- out:
- ds_put_context(context);
- return error;
+ return 0;
}
-int ds_set_pebs_reset(struct task_struct *task, u64 value)
+int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
{
- struct ds_context *context;
- int error;
-
- context = ds_get_context(task);
- error = ds_validate_access(context, ds_pebs);
- if (error < 0)
- goto out;
+ if (!tracer)
+ return -EINVAL;
- *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value;
+ *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
- error = 0;
- out:
- ds_put_context(context);
- return error;
+ return 0;
}
static const struct ds_configuration ds_cfg_var = {
.sizeof_ds = sizeof(long) * 12,
.sizeof_field = sizeof(long),
.sizeof_rec[ds_bts] = sizeof(long) * 3,
+#ifdef __i386__
.sizeof_rec[ds_pebs] = sizeof(long) * 10
+#else
+ .sizeof_rec[ds_pebs] = sizeof(long) * 18
+#endif
};
static const struct ds_configuration ds_cfg_64 = {
.sizeof_ds = 8 * 12,
.sizeof_field = 8,
.sizeof_rec[ds_bts] = 8 * 3,
+#ifdef __i386__
.sizeof_rec[ds_pebs] = 8 * 10
+#else
+ .sizeof_rec[ds_pebs] = 8 * 18
+#endif
};
static inline void
ds_configure(const struct ds_configuration *cfg)
{
ds_cfg = *cfg;
+
+ printk(KERN_INFO "DS available\n");
+
+ BUG_ON(MAX_SIZEOF_DS < ds_cfg.sizeof_ds);
}
void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
@@ -821,17 +782,16 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
switch (c->x86) {
case 0x6:
switch (c->x86_model) {
+ case 0 ... 0xC:
+ /* sorry, don't know about them */
+ break;
case 0xD:
case 0xE: /* Pentium M */
ds_configure(&ds_cfg_var);
break;
- case 0xF: /* Core2 */
- case 0x1C: /* Atom */
+ default: /* Core2, Atom, ... */
ds_configure(&ds_cfg_64);
break;
- default:
- /* sorry, don't know about them */
- break;
}
break;
case 0xF:
@@ -858,7 +818,8 @@ void ds_free(struct ds_context *context)
* is dying. There should not be any user of that context left
* to disturb us, anymore. */
unsigned long leftovers = context->count;
- while (leftovers--)
+ while (leftovers--) {
+ put_tracer(context->task);
ds_put_context(context);
+ }
}
-#endif /* CONFIG_X86_DS */
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
new file mode 100644
index 000000000000..6b1f6f6f8661
--- /dev/null
+++ b/arch/x86/kernel/dumpstack.c
@@ -0,0 +1,351 @@
+/*
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+ */
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+#include <linux/utsname.h>
+#include <linux/hardirq.h>
+#include <linux/kdebug.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/kexec.h>
+#include <linux/bug.h>
+#include <linux/nmi.h>
+#include <linux/sysfs.h>
+
+#include <asm/stacktrace.h>
+
+#include "dumpstack.h"
+
+int panic_on_unrecovered_nmi;
+unsigned int code_bytes = 64;
+int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
+static int die_counter;
+
+void printk_address(unsigned long address, int reliable)
+{
+ printk(" [<%p>] %s%pS\n", (void *) address,
+ reliable ? "" : "? ", (void *) address);
+}
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static void
+print_ftrace_graph_addr(unsigned long addr, void *data,
+ const struct stacktrace_ops *ops,
+ struct thread_info *tinfo, int *graph)
+{
+ struct task_struct *task = tinfo->task;
+ unsigned long ret_addr;
+ int index = task->curr_ret_stack;
+
+ if (addr != (unsigned long)return_to_handler)
+ return;
+
+ if (!task->ret_stack || index < *graph)
+ return;
+
+ index -= *graph;
+ ret_addr = task->ret_stack[index].ret;
+
+ ops->address(data, ret_addr, 1);
+
+ (*graph)++;
+}
+#else
+static inline void
+print_ftrace_graph_addr(unsigned long addr, void *data,
+ const struct stacktrace_ops *ops,
+ struct thread_info *tinfo, int *graph)
+{ }
+#endif
+
+/*
+ * x86-64 can have up to three kernel stacks:
+ * process stack
+ * interrupt stack
+ * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
+ */
+
+static inline int valid_stack_ptr(struct thread_info *tinfo,
+ void *p, unsigned int size, void *end)
+{
+ void *t = tinfo;
+ if (end) {
+ if (p < end && p >= (end-THREAD_SIZE))
+ return 1;
+ else
+ return 0;
+ }
+ return p > t && p < t + THREAD_SIZE - size;
+}
+
+unsigned long
+print_context_stack(struct thread_info *tinfo,
+ unsigned long *stack, unsigned long bp,
+ const struct stacktrace_ops *ops, void *data,
+ unsigned long *end, int *graph)
+{
+ struct stack_frame *frame = (struct stack_frame *)bp;
+
+ while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
+ unsigned long addr;
+
+ addr = *stack;
+ if (__kernel_text_address(addr)) {
+ if ((unsigned long) stack == bp + sizeof(long)) {
+ ops->address(data, addr, 1);
+ frame = frame->next_frame;
+ bp = (unsigned long) frame;
+ } else {
+ ops->address(data, addr, bp == 0);
+ }
+ print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
+ }
+ stack++;
+ }
+ return bp;
+}
+
+
+static void
+print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
+{
+ printk(data);
+ print_symbol(msg, symbol);
+ printk("\n");
+}
+
+static void print_trace_warning(void *data, char *msg)
+{
+ printk("%s%s\n", (char *)data, msg);
+}
+
+static int print_trace_stack(void *data, char *name)
+{
+ printk("%s <%s> ", (char *)data, name);
+ return 0;
+}
+
+/*
+ * Print one address/symbol entries per line.
+ */
+static void print_trace_address(void *data, unsigned long addr, int reliable)
+{
+ touch_nmi_watchdog();
+ printk(data);
+ printk_address(addr, reliable);
+}
+
+static const struct stacktrace_ops print_trace_ops = {
+ .warning = print_trace_warning,
+ .warning_symbol = print_trace_warning_symbol,
+ .stack = print_trace_stack,
+ .address = print_trace_address,
+};
+
+void
+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, unsigned long bp, char *log_lvl)
+{
+ printk("%sCall Trace:\n", log_lvl);
+ dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
+}
+
+void show_trace(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, unsigned long bp)
+{
+ show_trace_log_lvl(task, regs, stack, bp, "");
+}
+
+void show_stack(struct task_struct *task, unsigned long *sp)
+{
+ show_stack_log_lvl(task, NULL, sp, 0, "");
+}
+
+/*
+ * The architecture-independent dump_stack generator
+ */
+void dump_stack(void)
+{
+ unsigned long bp = 0;
+ unsigned long stack;
+
+#ifdef CONFIG_FRAME_POINTER
+ if (!bp)
+ get_bp(bp);
+#endif
+
+ printk("Pid: %d, comm: %.20s %s %s %.*s\n",
+ current->pid, current->comm, print_tainted(),
+ init_utsname()->release,
+ (int)strcspn(init_utsname()->version, " "),
+ init_utsname()->version);
+ show_trace(NULL, NULL, &stack, bp);
+}
+EXPORT_SYMBOL(dump_stack);
+
+static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static int die_owner = -1;
+static unsigned int die_nest_count;
+
+unsigned __kprobes long oops_begin(void)
+{
+ int cpu;
+ unsigned long flags;
+
+ oops_enter();
+
+ /* racy, but better than risking deadlock. */
+ raw_local_irq_save(flags);
+ cpu = smp_processor_id();
+ if (!__raw_spin_trylock(&die_lock)) {
+ if (cpu == die_owner)
+ /* nested oops. should stop eventually */;
+ else
+ __raw_spin_lock(&die_lock);
+ }
+ die_nest_count++;
+ die_owner = cpu;
+ console_verbose();
+ bust_spinlocks(1);
+ return flags;
+}
+
+void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
+{
+ if (regs && kexec_should_crash(current))
+ crash_kexec(regs);
+
+ bust_spinlocks(0);
+ die_owner = -1;
+ add_taint(TAINT_DIE);
+ die_nest_count--;
+ if (!die_nest_count)
+ /* Nest count reaches zero, release the lock. */
+ __raw_spin_unlock(&die_lock);
+ raw_local_irq_restore(flags);
+ oops_exit();
+
+ if (!signr)
+ return;
+ if (in_interrupt())
+ panic("Fatal exception in interrupt");
+ if (panic_on_oops)
+ panic("Fatal exception");
+ do_exit(signr);
+}
+
+int __kprobes __die(const char *str, struct pt_regs *regs, long err)
+{
+#ifdef CONFIG_X86_32
+ unsigned short ss;
+ unsigned long sp;
+#endif
+ printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
+#ifdef CONFIG_PREEMPT
+ printk("PREEMPT ");
+#endif
+#ifdef CONFIG_SMP
+ printk("SMP ");
+#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ printk("DEBUG_PAGEALLOC");
+#endif
+ printk("\n");
+ sysfs_printk_last_file();
+ if (notify_die(DIE_OOPS, str, regs, err,
+ current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
+ return 1;
+
+ show_registers(regs);
+#ifdef CONFIG_X86_32
+ sp = (unsigned long) (&regs->sp);
+ savesegment(ss, ss);
+ if (user_mode(regs)) {
+ sp = regs->sp;
+ ss = regs->ss & 0xffff;
+ }
+ printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
+ print_symbol("%s", regs->ip);
+ printk(" SS:ESP %04x:%08lx\n", ss, sp);
+#else
+ /* Executive summary in case the oops scrolled away */
+ printk(KERN_ALERT "RIP ");
+ printk_address(regs->ip, 1);
+ printk(" RSP <%016lx>\n", regs->sp);
+#endif
+ return 0;
+}
+
+/*
+ * This is gone through when something in the kernel has done something bad
+ * and is about to be terminated:
+ */
+void die(const char *str, struct pt_regs *regs, long err)
+{
+ unsigned long flags = oops_begin();
+ int sig = SIGSEGV;
+
+ if (!user_mode_vm(regs))
+ report_bug(regs->ip, regs);
+
+ if (__die(str, regs, err))
+ sig = 0;
+ oops_end(flags, regs, sig);
+}
+
+void notrace __kprobes
+die_nmi(char *str, struct pt_regs *regs, int do_panic)
+{
+ unsigned long flags;
+
+ if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
+ return;
+
+ /*
+ * We are in trouble anyway, lets at least try
+ * to get a message out.
+ */
+ flags = oops_begin();
+ printk(KERN_EMERG "%s", str);
+ printk(" on CPU%d, ip %08lx, registers:\n",
+ smp_processor_id(), regs->ip);
+ show_registers(regs);
+ oops_end(flags, regs, 0);
+ if (do_panic || panic_on_oops)
+ panic("Non maskable interrupt");
+ nmi_exit();
+ local_irq_enable();
+ do_exit(SIGBUS);
+}
+
+static int __init oops_setup(char *s)
+{
+ if (!s)
+ return -EINVAL;
+ if (!strcmp(s, "panic"))
+ panic_on_oops = 1;
+ return 0;
+}
+early_param("oops", oops_setup);
+
+static int __init kstack_setup(char *s)
+{
+ if (!s)
+ return -EINVAL;
+ kstack_depth_to_print = simple_strtoul(s, NULL, 0);
+ return 0;
+}
+early_param("kstack", kstack_setup);
+
+static int __init code_bytes_setup(char *s)
+{
+ code_bytes = simple_strtoul(s, NULL, 0);
+ if (code_bytes > 8192)
+ code_bytes = 8192;
+
+ return 1;
+}
+__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
new file mode 100644
index 000000000000..da87590b8698
--- /dev/null
+++ b/arch/x86/kernel/dumpstack.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+ */
+
+#ifndef DUMPSTACK_H
+#define DUMPSTACK_H
+
+#ifdef CONFIG_X86_32
+#define STACKSLOTS_PER_LINE 8
+#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
+#else
+#define STACKSLOTS_PER_LINE 4
+#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
+#endif
+
+extern unsigned long
+print_context_stack(struct thread_info *tinfo,
+ unsigned long *stack, unsigned long bp,
+ const struct stacktrace_ops *ops, void *data,
+ unsigned long *end, int *graph);
+
+extern void
+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, unsigned long bp, char *log_lvl);
+
+extern void
+show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *sp, unsigned long bp, char *log_lvl);
+
+extern unsigned int code_bytes;
+extern int kstack_depth_to_print;
+
+/* The form of the top of the frame on the stack */
+struct stack_frame {
+ struct stack_frame *next_frame;
+ unsigned long return_address;
+};
+#endif
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index b3614752197b..d593cd1f58dc 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -17,69 +17,14 @@
#include <asm/stacktrace.h>
-#define STACKSLOTS_PER_LINE 8
-#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
-
-int panic_on_unrecovered_nmi;
-int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
-static unsigned int code_bytes = 64;
-static int die_counter;
-
-void printk_address(unsigned long address, int reliable)
-{
- printk(" [<%p>] %s%pS\n", (void *) address,
- reliable ? "" : "? ", (void *) address);
-}
-
-static inline int valid_stack_ptr(struct thread_info *tinfo,
- void *p, unsigned int size, void *end)
-{
- void *t = tinfo;
- if (end) {
- if (p < end && p >= (end-THREAD_SIZE))
- return 1;
- else
- return 0;
- }
- return p > t && p < t + THREAD_SIZE - size;
-}
-
-/* The form of the top of the frame on the stack */
-struct stack_frame {
- struct stack_frame *next_frame;
- unsigned long return_address;
-};
-
-static inline unsigned long
-print_context_stack(struct thread_info *tinfo,
- unsigned long *stack, unsigned long bp,
- const struct stacktrace_ops *ops, void *data,
- unsigned long *end)
-{
- struct stack_frame *frame = (struct stack_frame *)bp;
-
- while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
- unsigned long addr;
-
- addr = *stack;
- if (__kernel_text_address(addr)) {
- if ((unsigned long) stack == bp + sizeof(long)) {
- ops->address(data, addr, 1);
- frame = frame->next_frame;
- bp = (unsigned long) frame;
- } else {
- ops->address(data, addr, bp == 0);
- }
- }
- stack++;
- }
- return bp;
-}
+#include "dumpstack.h"
void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data)
{
+ int graph = 0;
+
if (!task)
task = current;
@@ -107,7 +52,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
context = (struct thread_info *)
((unsigned long)stack & (~(THREAD_SIZE - 1)));
- bp = print_context_stack(context, stack, bp, ops, data, NULL);
+ bp = print_context_stack(context, stack, bp, ops,
+ data, NULL, &graph);
stack = (unsigned long *)context->previous_esp;
if (!stack)
@@ -119,57 +65,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
}
EXPORT_SYMBOL(dump_trace);
-static void
-print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
- printk(data);
- print_symbol(msg, symbol);
- printk("\n");
-}
-
-static void print_trace_warning(void *data, char *msg)
-{
- printk("%s%s\n", (char *)data, msg);
-}
-
-static int print_trace_stack(void *data, char *name)
-{
- printk("%s <%s> ", (char *)data, name);
- return 0;
-}
-
-/*
- * Print one address/symbol entries per line.
- */
-static void print_trace_address(void *data, unsigned long addr, int reliable)
-{
- touch_nmi_watchdog();
- printk(data);
- printk_address(addr, reliable);
-}
-
-static const struct stacktrace_ops print_trace_ops = {
- .warning = print_trace_warning,
- .warning_symbol = print_trace_warning_symbol,
- .stack = print_trace_stack,
- .address = print_trace_address,
-};
-
-static void
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp, char *log_lvl)
-{
- printk("%sCall Trace:\n", log_lvl);
- dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
-}
-
-void show_trace(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp)
-{
- show_trace_log_lvl(task, regs, stack, bp, "");
-}
-
-static void
+void
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, unsigned long bp, char *log_lvl)
{
@@ -196,33 +92,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
-void show_stack(struct task_struct *task, unsigned long *sp)
-{
- show_stack_log_lvl(task, NULL, sp, 0, "");
-}
-
-/*
- * The architecture-independent dump_stack generator
- */
-void dump_stack(void)
-{
- unsigned long bp = 0;
- unsigned long stack;
-
-#ifdef CONFIG_FRAME_POINTER
- if (!bp)
- get_bp(bp);
-#endif
-
- printk("Pid: %d, comm: %.20s %s %s %.*s\n",
- current->pid, current->comm, print_tainted(),
- init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
- show_trace(NULL, NULL, &stack, bp);
-}
-
-EXPORT_SYMBOL(dump_stack);
void show_registers(struct pt_regs *regs)
{
@@ -283,167 +152,3 @@ int is_valid_bugaddr(unsigned long ip)
return ud2 == 0x0b0f;
}
-static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
-static int die_owner = -1;
-static unsigned int die_nest_count;
-
-unsigned __kprobes long oops_begin(void)
-{
- unsigned long flags;
-
- oops_enter();
-
- if (die_owner != raw_smp_processor_id()) {
- console_verbose();
- raw_local_irq_save(flags);
- __raw_spin_lock(&die_lock);
- die_owner = smp_processor_id();
- die_nest_count = 0;
- bust_spinlocks(1);
- } else {
- raw_local_irq_save(flags);
- }
- die_nest_count++;
- return flags;
-}
-
-void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
-{
- bust_spinlocks(0);
- die_owner = -1;
- add_taint(TAINT_DIE);
- __raw_spin_unlock(&die_lock);
- raw_local_irq_restore(flags);
-
- if (!regs)
- return;
-
- if (kexec_should_crash(current))
- crash_kexec(regs);
- if (in_interrupt())
- panic("Fatal exception in interrupt");
- if (panic_on_oops)
- panic("Fatal exception");
- oops_exit();
- do_exit(signr);
-}
-
-int __kprobes __die(const char *str, struct pt_regs *regs, long err)
-{
- unsigned short ss;
- unsigned long sp;
-
- printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
- printk("SMP ");
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC");
-#endif
- printk("\n");
- sysfs_printk_last_file();
- if (notify_die(DIE_OOPS, str, regs, err,
- current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
- return 1;
-
- show_registers(regs);
- /* Executive summary in case the oops scrolled away */
- sp = (unsigned long) (&regs->sp);
- savesegment(ss, ss);
- if (user_mode(regs)) {
- sp = regs->sp;
- ss = regs->ss & 0xffff;
- }
- printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
- print_symbol("%s", regs->ip);
- printk(" SS:ESP %04x:%08lx\n", ss, sp);
- return 0;
-}
-
-/*
- * This is gone through when something in the kernel has done something bad
- * and is about to be terminated:
- */
-void die(const char *str, struct pt_regs *regs, long err)
-{
- unsigned long flags = oops_begin();
-
- if (die_nest_count < 3) {
- report_bug(regs->ip, regs);
-
- if (__die(str, regs, err))
- regs = NULL;
- } else {
- printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
- }
-
- oops_end(flags, regs, SIGSEGV);
-}
-
-static DEFINE_SPINLOCK(nmi_print_lock);
-
-void notrace __kprobes
-die_nmi(char *str, struct pt_regs *regs, int do_panic)
-{
- if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
- return;
-
- spin_lock(&nmi_print_lock);
- /*
- * We are in trouble anyway, lets at least try
- * to get a message out:
- */
- bust_spinlocks(1);
- printk(KERN_EMERG "%s", str);
- printk(" on CPU%d, ip %08lx, registers:\n",
- smp_processor_id(), regs->ip);
- show_registers(regs);
- if (do_panic)
- panic("Non maskable interrupt");
- console_silent();
- spin_unlock(&nmi_print_lock);
-
- /*
- * If we are in kernel we are probably nested up pretty bad
- * and might aswell get out now while we still can:
- */
- if (!user_mode_vm(regs)) {
- current->thread.trap_no = 2;
- crash_kexec(regs);
- }
-
- bust_spinlocks(0);
- do_exit(SIGSEGV);
-}
-
-static int __init oops_setup(char *s)
-{
- if (!s)
- return -EINVAL;
- if (!strcmp(s, "panic"))
- panic_on_oops = 1;
- return 0;
-}
-early_param("oops", oops_setup);
-
-static int __init kstack_setup(char *s)
-{
- if (!s)
- return -EINVAL;
- kstack_depth_to_print = simple_strtoul(s, NULL, 0);
- return 0;
-}
-early_param("kstack", kstack_setup);
-
-static int __init code_bytes_setup(char *s)
-{
- code_bytes = simple_strtoul(s, NULL, 0);
- if (code_bytes > 8192)
- code_bytes = 8192;
-
- return 1;
-}
-__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 96a5db7da8a7..c302d0707048 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -17,19 +17,7 @@
#include <asm/stacktrace.h>
-#define STACKSLOTS_PER_LINE 4
-#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
-
-int panic_on_unrecovered_nmi;
-int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
-static unsigned int code_bytes = 64;
-static int die_counter;
-
-void printk_address(unsigned long address, int reliable)
-{
- printk(" [<%p>] %s%pS\n", (void *) address,
- reliable ? "" : "? ", (void *) address);
-}
+#include "dumpstack.h"
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
unsigned *usedp, char **idp)
@@ -113,51 +101,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
* severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
*/
-static inline int valid_stack_ptr(struct thread_info *tinfo,
- void *p, unsigned int size, void *end)
-{
- void *t = tinfo;
- if (end) {
- if (p < end && p >= (end-THREAD_SIZE))
- return 1;
- else
- return 0;
- }
- return p > t && p < t + THREAD_SIZE - size;
-}
-
-/* The form of the top of the frame on the stack */
-struct stack_frame {
- struct stack_frame *next_frame;
- unsigned long return_address;
-};
-
-static inline unsigned long
-print_context_stack(struct thread_info *tinfo,
- unsigned long *stack, unsigned long bp,
- const struct stacktrace_ops *ops, void *data,
- unsigned long *end)
-{
- struct stack_frame *frame = (struct stack_frame *)bp;
-
- while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
- unsigned long addr;
-
- addr = *stack;
- if (__kernel_text_address(addr)) {
- if ((unsigned long) stack == bp + sizeof(long)) {
- ops->address(data, addr, 1);
- frame = frame->next_frame;
- bp = (unsigned long) frame;
- } else {
- ops->address(data, addr, bp == 0);
- }
- }
- stack++;
- }
- return bp;
-}
-
void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data)
@@ -166,6 +109,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
unsigned used = 0;
struct thread_info *tinfo;
+ int graph = 0;
if (!task)
task = current;
@@ -206,7 +150,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
break;
bp = print_context_stack(tinfo, stack, bp, ops,
- data, estack_end);
+ data, estack_end, &graph);
ops->stack(data, "<EOE>");
/*
* We link to the next stack via the
@@ -225,7 +169,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
if (ops->stack(data, "IRQ") < 0)
break;
bp = print_context_stack(tinfo, stack, bp,
- ops, data, irqstack_end);
+ ops, data, irqstack_end, &graph);
/*
* We link to the next stack (which would be
* the process stack normally) the last
@@ -243,62 +187,12 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
/*
* This handles the process stack:
*/
- bp = print_context_stack(tinfo, stack, bp, ops, data, NULL);
+ bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph);
put_cpu();
}
EXPORT_SYMBOL(dump_trace);
-static void
-print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
- printk(data);
- print_symbol(msg, symbol);
- printk("\n");
-}
-
-static void print_trace_warning(void *data, char *msg)
-{
- printk("%s%s\n", (char *)data, msg);
-}
-
-static int print_trace_stack(void *data, char *name)
-{
- printk("%s <%s> ", (char *)data, name);
- return 0;
-}
-
-/*
- * Print one address/symbol entries per line.
- */
-static void print_trace_address(void *data, unsigned long addr, int reliable)
-{
- touch_nmi_watchdog();
- printk(data);
- printk_address(addr, reliable);
-}
-
-static const struct stacktrace_ops print_trace_ops = {
- .warning = print_trace_warning,
- .warning_symbol = print_trace_warning_symbol,
- .stack = print_trace_stack,
- .address = print_trace_address,
-};
-
-static void
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp, char *log_lvl)
-{
- printk("%sCall Trace:\n", log_lvl);
- dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
-}
-
-void show_trace(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp)
-{
- show_trace_log_lvl(task, regs, stack, bp, "");
-}
-
-static void
+void
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, unsigned long bp, char *log_lvl)
{
@@ -342,33 +236,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
-void show_stack(struct task_struct *task, unsigned long *sp)
-{
- show_stack_log_lvl(task, NULL, sp, 0, "");
-}
-
-/*
- * The architecture-independent dump_stack generator
- */
-void dump_stack(void)
-{
- unsigned long bp = 0;
- unsigned long stack;
-
-#ifdef CONFIG_FRAME_POINTER
- if (!bp)
- get_bp(bp);
-#endif
-
- printk("Pid: %d, comm: %.20s %s %s %.*s\n",
- current->pid, current->comm, print_tainted(),
- init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
- show_trace(NULL, NULL, &stack, bp);
-}
-EXPORT_SYMBOL(dump_stack);
-
void show_registers(struct pt_regs *regs)
{
int i;
@@ -429,147 +296,3 @@ int is_valid_bugaddr(unsigned long ip)
return ud2 == 0x0b0f;
}
-static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
-static int die_owner = -1;
-static unsigned int die_nest_count;
-
-unsigned __kprobes long oops_begin(void)
-{
- int cpu;
- unsigned long flags;
-
- oops_enter();
-
- /* racy, but better than risking deadlock. */
- raw_local_irq_save(flags);
- cpu = smp_processor_id();
- if (!__raw_spin_trylock(&die_lock)) {
- if (cpu == die_owner)
- /* nested oops. should stop eventually */;
- else
- __raw_spin_lock(&die_lock);
- }
- die_nest_count++;
- die_owner = cpu;
- console_verbose();
- bust_spinlocks(1);
- return flags;
-}
-
-void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
-{
- die_owner = -1;
- bust_spinlocks(0);
- die_nest_count--;
- if (!die_nest_count)
- /* Nest count reaches zero, release the lock. */
- __raw_spin_unlock(&die_lock);
- raw_local_irq_restore(flags);
- if (!regs) {
- oops_exit();
- return;
- }
- if (in_interrupt())
- panic("Fatal exception in interrupt");
- if (panic_on_oops)
- panic("Fatal exception");
- oops_exit();
- do_exit(signr);
-}
-
-int __kprobes __die(const char *str, struct pt_regs *regs, long err)
-{
- printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
- printk("SMP ");
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC");
-#endif
- printk("\n");
- sysfs_printk_last_file();
- if (notify_die(DIE_OOPS, str, regs, err,
- current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
- return 1;
-
- show_registers(regs);
- add_taint(TAINT_DIE);
- /* Executive summary in case the oops scrolled away */
- printk(KERN_ALERT "RIP ");
- printk_address(regs->ip, 1);
- printk(" RSP <%016lx>\n", regs->sp);
- if (kexec_should_crash(current))
- crash_kexec(regs);
- return 0;
-}
-
-void die(const char *str, struct pt_regs *regs, long err)
-{
- unsigned long flags = oops_begin();
-
- if (!user_mode(regs))
- report_bug(regs->ip, regs);
-
- if (__die(str, regs, err))
- regs = NULL;
- oops_end(flags, regs, SIGSEGV);
-}
-
-notrace __kprobes void
-die_nmi(char *str, struct pt_regs *regs, int do_panic)
-{
- unsigned long flags;
-
- if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
- return;
-
- flags = oops_begin();
- /*
- * We are in trouble anyway, lets at least try
- * to get a message out.
- */
- printk(KERN_EMERG "%s", str);
- printk(" on CPU%d, ip %08lx, registers:\n",
- smp_processor_id(), regs->ip);
- show_registers(regs);
- if (kexec_should_crash(current))
- crash_kexec(regs);
- if (do_panic || panic_on_oops)
- panic("Non maskable interrupt");
- oops_end(flags, NULL, SIGBUS);
- nmi_exit();
- local_irq_enable();
- do_exit(SIGBUS);
-}
-
-static int __init oops_setup(char *s)
-{
- if (!s)
- return -EINVAL;
- if (!strcmp(s, "panic"))
- panic_on_oops = 1;
- return 0;
-}
-early_param("oops", oops_setup);
-
-static int __init kstack_setup(char *s)
-{
- if (!s)
- return -EINVAL;
- kstack_depth_to_print = simple_strtoul(s, NULL, 0);
- return 0;
-}
-early_param("kstack", kstack_setup);
-
-static int __init code_bytes_setup(char *s)
-{
- code_bytes = simple_strtoul(s, NULL, 0);
- if (code_bytes > 8192)
- code_bytes = 8192;
-
- return 1;
-}
-__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 3ce029ffaa55..1b894b72c0f5 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -188,20 +188,6 @@ static void __init ati_bugs_contd(int num, int slot, int func)
}
#endif
-#ifdef CONFIG_DMAR
-static void __init intel_g33_dmar(int num, int slot, int func)
-{
- struct acpi_table_header *dmar_tbl;
- acpi_status status;
-
- status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
- if (ACPI_SUCCESS(status)) {
- printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n");
- dmar_disabled = 1;
- }
-}
-#endif
-
#define QFLAG_APPLY_ONCE 0x1
#define QFLAG_APPLIED 0x2
#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -225,10 +211,6 @@ static struct chipset early_qrk[] __initdata = {
PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs },
{ PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd },
-#ifdef CONFIG_DMAR
- { PCI_VENDOR_ID_INTEL, 0x29c0,
- PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar },
-#endif
{}
};
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 28b597ef9ca1..43ceb3f454bf 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1157,6 +1157,9 @@ ENTRY(mcount)
END(mcount)
ENTRY(ftrace_caller)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
pushl %eax
pushl %ecx
pushl %edx
@@ -1171,6 +1174,11 @@ ftrace_call:
popl %edx
popl %ecx
popl %eax
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+ jmp ftrace_stub
+#endif
.globl ftrace_stub
ftrace_stub:
@@ -1180,8 +1188,18 @@ END(ftrace_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */
ENTRY(mcount)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
cmpl $ftrace_stub, ftrace_trace_function
jnz trace
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ cmpl $ftrace_stub, ftrace_graph_return
+ jnz ftrace_graph_caller
+
+ cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
+ jnz ftrace_graph_caller
+#endif
.globl ftrace_stub
ftrace_stub:
ret
@@ -1200,12 +1218,43 @@ trace:
popl %edx
popl %ecx
popl %eax
-
jmp ftrace_stub
END(mcount)
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_TRACER */
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ movl 0xc(%esp), %edx
+ lea 0x4(%ebp), %eax
+ subl $MCOUNT_INSN_SIZE, %edx
+ call prepare_ftrace_return
+ popl %edx
+ popl %ecx
+ popl %eax
+ ret
+END(ftrace_graph_caller)
+
+.globl return_to_handler
+return_to_handler:
+ pushl $0
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ call ftrace_return_to_handler
+ movl %eax, 0xc(%esp)
+ popl %edx
+ popl %ecx
+ popl %eax
+ ret
+#endif
+
.section .rodata,"a"
#include "syscall_table_32.S"
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b86f332c96a6..54e0bbdccb99 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -68,6 +68,8 @@ ENTRY(mcount)
END(mcount)
ENTRY(ftrace_caller)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
/* taken from glibc */
subq $0x38, %rsp
@@ -96,6 +98,12 @@ ftrace_call:
movq (%rsp), %rax
addq $0x38, %rsp
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+ jmp ftrace_stub
+#endif
+
.globl ftrace_stub
ftrace_stub:
retq
@@ -103,8 +111,20 @@ END(ftrace_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */
ENTRY(mcount)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
cmpq $ftrace_stub, ftrace_trace_function
jnz trace
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ cmpq $ftrace_stub, ftrace_graph_return
+ jnz ftrace_graph_caller
+
+ cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
+ jnz ftrace_graph_caller
+#endif
+
.globl ftrace_stub
ftrace_stub:
retq
@@ -140,6 +160,69 @@ END(mcount)
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_TRACER */
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
+ subq $0x38, %rsp
+ movq %rax, (%rsp)
+ movq %rcx, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rsi, 24(%rsp)
+ movq %rdi, 32(%rsp)
+ movq %r8, 40(%rsp)
+ movq %r9, 48(%rsp)
+
+ leaq 8(%rbp), %rdi
+ movq 0x38(%rsp), %rsi
+ subq $MCOUNT_INSN_SIZE, %rsi
+
+ call prepare_ftrace_return
+
+ movq 48(%rsp), %r9
+ movq 40(%rsp), %r8
+ movq 32(%rsp), %rdi
+ movq 24(%rsp), %rsi
+ movq 16(%rsp), %rdx
+ movq 8(%rsp), %rcx
+ movq (%rsp), %rax
+ addq $0x38, %rsp
+ retq
+END(ftrace_graph_caller)
+
+
+.globl return_to_handler
+return_to_handler:
+ subq $80, %rsp
+
+ movq %rax, (%rsp)
+ movq %rcx, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rsi, 24(%rsp)
+ movq %rdi, 32(%rsp)
+ movq %r8, 40(%rsp)
+ movq %r9, 48(%rsp)
+ movq %r10, 56(%rsp)
+ movq %r11, 64(%rsp)
+
+ call ftrace_return_to_handler
+
+ movq %rax, 72(%rsp)
+ movq 64(%rsp), %r11
+ movq 56(%rsp), %r10
+ movq 48(%rsp), %r9
+ movq 40(%rsp), %r8
+ movq 32(%rsp), %rdi
+ movq 24(%rsp), %rsi
+ movq 16(%rsp), %rdx
+ movq 8(%rsp), %rcx
+ movq (%rsp), %rax
+ addq $72, %rsp
+ retq
+#endif
+
+
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
#endif
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c
index f454c78fcef6..53699c931ad4 100644
--- a/arch/x86/kernel/es7000_32.c
+++ b/arch/x86/kernel/es7000_32.c
@@ -38,8 +38,11 @@
#include <asm/io.h>
#include <asm/nmi.h>
#include <asm/smp.h>
+#include <asm/atomic.h>
#include <asm/apicdef.h>
#include <mach_mpparse.h>
+#include <asm/genapic.h>
+#include <asm/setup.h>
/*
* ES7000 chipsets
@@ -161,6 +164,43 @@ es7000_rename_gsi(int ioapic, int gsi)
return gsi;
}
+static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
+{
+ unsigned long vect = 0, psaival = 0;
+
+ if (psai == NULL)
+ return -1;
+
+ vect = ((unsigned long)__pa(eip)/0x1000) << 16;
+ psaival = (0x1000000 | vect | cpu);
+
+ while (*psai & 0x1000000)
+ ;
+
+ *psai = psaival;
+
+ return 0;
+}
+
+static void noop_wait_for_deassert(atomic_t *deassert_not_used)
+{
+}
+
+static int __init es7000_update_genapic(void)
+{
+ genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
+
+ /* MPENTIUMIII */
+ if (boot_cpu_data.x86 == 6 &&
+ (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) {
+ es7000_update_genapic_to_cluster();
+ genapic->wait_for_init_deassert = noop_wait_for_deassert;
+ genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
+ }
+
+ return 0;
+}
+
void __init
setup_unisys(void)
{
@@ -176,6 +216,8 @@ setup_unisys(void)
else
es7000_plat = ES7000_CLASSIC;
ioapic_renumber_irq = es7000_rename_gsi;
+
+ x86_quirks->update_genapic = es7000_update_genapic;
}
/*
@@ -250,31 +292,24 @@ int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
{
struct acpi_table_header *header = NULL;
int i = 0;
- acpi_size tbl_size;
- while (ACPI_SUCCESS(acpi_get_table_with_size("OEM1", i++, &header, &tbl_size))) {
+ while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) {
if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) {
struct oem_table *t = (struct oem_table *)header;
oem_addrX = t->OEMTableAddr;
oem_size = t->OEMTableSize;
- early_acpi_os_unmap_memory(header, tbl_size);
*oem_addr = (unsigned long)__acpi_map_table(oem_addrX,
oem_size);
return 0;
}
- early_acpi_os_unmap_memory(header, tbl_size);
}
return -1;
}
void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
{
- if (!oem_addr)
- return;
-
- __acpi_unmap_table((char *)oem_addr, oem_size);
}
#endif
@@ -324,26 +359,6 @@ es7000_mip_write(struct mip_reg *mip_reg)
return status;
}
-int
-es7000_start_cpu(int cpu, unsigned long eip)
-{
- unsigned long vect = 0, psaival = 0;
-
- if (psai == NULL)
- return -1;
-
- vect = ((unsigned long)__pa(eip)/0x1000) << 16;
- psaival = (0x1000000 | vect | cpu);
-
- while (*psai & 0x1000000)
- ;
-
- *psai = psaival;
-
- return 0;
-
-}
-
void __init
es7000_sw_apic(void)
{
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 50ea0ac8c9bf..1b43086b097a 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -14,14 +14,17 @@
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/percpu.h>
+#include <linux/sched.h>
#include <linux/init.h>
#include <linux/list.h>
#include <asm/ftrace.h>
+#include <linux/ftrace.h>
#include <asm/nops.h>
+#include <asm/nmi.h>
-static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
+#ifdef CONFIG_DYNAMIC_FTRACE
union ftrace_code_union {
char code[MCOUNT_INSN_SIZE];
@@ -31,18 +34,12 @@ union ftrace_code_union {
} __attribute__((packed));
};
-
static int ftrace_calc_offset(long ip, long addr)
{
return (int)(addr - ip);
}
-unsigned char *ftrace_nop_replace(void)
-{
- return ftrace_nop;
-}
-
-unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
{
static union ftrace_code_union calc;
@@ -56,7 +53,142 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
return calc.code;
}
-int
+/*
+ * Modifying code must take extra care. On an SMP machine, if
+ * the code being modified is also being executed on another CPU
+ * that CPU will have undefined results and possibly take a GPF.
+ * We use kstop_machine to stop other CPUS from exectuing code.
+ * But this does not stop NMIs from happening. We still need
+ * to protect against that. We separate out the modification of
+ * the code to take care of this.
+ *
+ * Two buffers are added: An IP buffer and a "code" buffer.
+ *
+ * 1) Put the instruction pointer into the IP buffer
+ * and the new code into the "code" buffer.
+ * 2) Set a flag that says we are modifying code
+ * 3) Wait for any running NMIs to finish.
+ * 4) Write the code
+ * 5) clear the flag.
+ * 6) Wait for any running NMIs to finish.
+ *
+ * If an NMI is executed, the first thing it does is to call
+ * "ftrace_nmi_enter". This will check if the flag is set to write
+ * and if it is, it will write what is in the IP and "code" buffers.
+ *
+ * The trick is, it does not matter if everyone is writing the same
+ * content to the code location. Also, if a CPU is executing code
+ * it is OK to write to that code location if the contents being written
+ * are the same as what exists.
+ */
+
+static atomic_t in_nmi = ATOMIC_INIT(0);
+static int mod_code_status; /* holds return value of text write */
+static int mod_code_write; /* set when NMI should do the write */
+static void *mod_code_ip; /* holds the IP to write to */
+static void *mod_code_newcode; /* holds the text to write to the IP */
+
+static unsigned nmi_wait_count;
+static atomic_t nmi_update_count = ATOMIC_INIT(0);
+
+int ftrace_arch_read_dyn_info(char *buf, int size)
+{
+ int r;
+
+ r = snprintf(buf, size, "%u %u",
+ nmi_wait_count,
+ atomic_read(&nmi_update_count));
+ return r;
+}
+
+static void ftrace_mod_code(void)
+{
+ /*
+ * Yes, more than one CPU process can be writing to mod_code_status.
+ * (and the code itself)
+ * But if one were to fail, then they all should, and if one were
+ * to succeed, then they all should.
+ */
+ mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
+ MCOUNT_INSN_SIZE);
+}
+
+void ftrace_nmi_enter(void)
+{
+ atomic_inc(&in_nmi);
+ /* Must have in_nmi seen before reading write flag */
+ smp_mb();
+ if (mod_code_write) {
+ ftrace_mod_code();
+ atomic_inc(&nmi_update_count);
+ }
+}
+
+void ftrace_nmi_exit(void)
+{
+ /* Finish all executions before clearing in_nmi */
+ smp_wmb();
+ atomic_dec(&in_nmi);
+}
+
+static void wait_for_nmi(void)
+{
+ int waited = 0;
+
+ while (atomic_read(&in_nmi)) {
+ waited = 1;
+ cpu_relax();
+ }
+
+ if (waited)
+ nmi_wait_count++;
+}
+
+static int
+do_ftrace_mod_code(unsigned long ip, void *new_code)
+{
+ mod_code_ip = (void *)ip;
+ mod_code_newcode = new_code;
+
+ /* The buffers need to be visible before we let NMIs write them */
+ smp_wmb();
+
+ mod_code_write = 1;
+
+ /* Make sure write bit is visible before we wait on NMIs */
+ smp_mb();
+
+ wait_for_nmi();
+
+ /* Make sure all running NMIs have finished before we write the code */
+ smp_mb();
+
+ ftrace_mod_code();
+
+ /* Make sure the write happens before clearing the bit */
+ smp_wmb();
+
+ mod_code_write = 0;
+
+ /* make sure NMIs see the cleared bit */
+ smp_mb();
+
+ wait_for_nmi();
+
+ return mod_code_status;
+}
+
+
+
+
+static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
+
+static unsigned char *ftrace_nop_replace(void)
+{
+ return ftrace_nop;
+}
+
+static int
ftrace_modify_code(unsigned long ip, unsigned char *old_code,
unsigned char *new_code)
{
@@ -81,7 +213,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
return -EINVAL;
/* replace the text with the new text */
- if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
+ if (do_ftrace_mod_code(ip, new_code))
return -EPERM;
sync_core();
@@ -89,6 +221,29 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
return 0;
}
+int ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned char *new, *old;
+ unsigned long ip = rec->ip;
+
+ old = ftrace_call_replace(ip, addr);
+ new = ftrace_nop_replace();
+
+ return ftrace_modify_code(rec->ip, old, new);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned char *new, *old;
+ unsigned long ip = rec->ip;
+
+ old = ftrace_nop_replace();
+ new = ftrace_call_replace(ip, addr);
+
+ return ftrace_modify_code(rec->ip, old, new);
+}
+
int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip = (unsigned long)(&ftrace_call);
@@ -165,3 +320,218 @@ int __init ftrace_dyn_arch_init(void *data)
return 0;
}
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern void ftrace_graph_call(void);
+
+static int ftrace_mod_jmp(unsigned long ip,
+ int old_offset, int new_offset)
+{
+ unsigned char code[MCOUNT_INSN_SIZE];
+
+ if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
+ return -EINVAL;
+
+ *(int *)(&code[1]) = new_offset;
+
+ if (do_ftrace_mod_code(ip, &code))
+ return -EPERM;
+
+ return 0;
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ unsigned long ip = (unsigned long)(&ftrace_graph_call);
+ int old_offset, new_offset;
+
+ old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
+ new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
+
+ return ftrace_mod_jmp(ip, old_offset, new_offset);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ unsigned long ip = (unsigned long)(&ftrace_graph_call);
+ int old_offset, new_offset;
+
+ old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
+ new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
+
+ return ftrace_mod_jmp(ip, old_offset, new_offset);
+}
+
+#else /* CONFIG_DYNAMIC_FTRACE */
+
+/*
+ * These functions are picked from those used on
+ * this page for dynamic ftrace. They have been
+ * simplified to ignore all traces in NMI context.
+ */
+static atomic_t in_nmi;
+
+void ftrace_nmi_enter(void)
+{
+ atomic_inc(&in_nmi);
+}
+
+void ftrace_nmi_exit(void)
+{
+ atomic_dec(&in_nmi);
+}
+
+#endif /* !CONFIG_DYNAMIC_FTRACE */
+
+/* Add a function return address to the trace stack on thread info.*/
+static int push_return_trace(unsigned long ret, unsigned long long time,
+ unsigned long func, int *depth)
+{
+ int index;
+
+ if (!current->ret_stack)
+ return -EBUSY;
+
+ /* The return trace stack is full */
+ if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
+ atomic_inc(&current->trace_overrun);
+ return -EBUSY;
+ }
+
+ index = ++current->curr_ret_stack;
+ barrier();
+ current->ret_stack[index].ret = ret;
+ current->ret_stack[index].func = func;
+ current->ret_stack[index].calltime = time;
+ *depth = index;
+
+ return 0;
+}
+
+/* Retrieve a function return address to the trace stack on thread info.*/
+static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
+{
+ int index;
+
+ index = current->curr_ret_stack;
+
+ if (unlikely(index < 0)) {
+ ftrace_graph_stop();
+ WARN_ON(1);
+ /* Might as well panic, otherwise we have no where to go */
+ *ret = (unsigned long)panic;
+ return;
+ }
+
+ *ret = current->ret_stack[index].ret;
+ trace->func = current->ret_stack[index].func;
+ trace->calltime = current->ret_stack[index].calltime;
+ trace->overrun = atomic_read(&current->trace_overrun);
+ trace->depth = index;
+ barrier();
+ current->curr_ret_stack--;
+
+}
+
+/*
+ * Send the trace to the ring-buffer.
+ * @return the original return address.
+ */
+unsigned long ftrace_return_to_handler(void)
+{
+ struct ftrace_graph_ret trace;
+ unsigned long ret;
+
+ pop_return_trace(&trace, &ret);
+ trace.rettime = cpu_clock(raw_smp_processor_id());
+ ftrace_graph_return(&trace);
+
+ if (unlikely(!ret)) {
+ ftrace_graph_stop();
+ WARN_ON(1);
+ /* Might as well panic. What else to do? */
+ ret = (unsigned long)panic;
+ }
+
+ return ret;
+}
+
+/*
+ * Hook the return address and push it in the stack of return addrs
+ * in current thread info.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
+{
+ unsigned long old;
+ unsigned long long calltime;
+ int faulted;
+ struct ftrace_graph_ent trace;
+ unsigned long return_hooker = (unsigned long)
+ &return_to_handler;
+
+ /* Nmi's are currently unsupported */
+ if (unlikely(atomic_read(&in_nmi)))
+ return;
+
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ return;
+
+ /*
+ * Protect against fault, even if it shouldn't
+ * happen. This tool is too much intrusive to
+ * ignore such a protection.
+ */
+ asm volatile(
+ "1: " _ASM_MOV " (%[parent_old]), %[old]\n"
+ "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n"
+ " movl $0, %[faulted]\n"
+
+ ".section .fixup, \"ax\"\n"
+ "3: movl $1, %[faulted]\n"
+ ".previous\n"
+
+ _ASM_EXTABLE(1b, 3b)
+ _ASM_EXTABLE(2b, 3b)
+
+ : [parent_replaced] "=r" (parent), [old] "=r" (old),
+ [faulted] "=r" (faulted)
+ : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
+ : "memory"
+ );
+
+ if (unlikely(faulted)) {
+ ftrace_graph_stop();
+ WARN_ON(1);
+ return;
+ }
+
+ if (unlikely(!__kernel_text_address(old))) {
+ ftrace_graph_stop();
+ *parent = old;
+ WARN_ON(1);
+ return;
+ }
+
+ calltime = cpu_clock(raw_smp_processor_id());
+
+ if (push_return_trace(old, calltime,
+ self_addr, &trace.depth) == -EBUSY) {
+ *parent = old;
+ return;
+ }
+
+ trace.func = self_addr;
+
+ /* Only trace if the calling function expects to */
+ if (!ftrace_graph_entry(&trace)) {
+ current->curr_ret_stack--;
+ *parent = old;
+ }
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 6c9bfc9e1e95..2bced78b0b8e 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -21,6 +21,7 @@
#include <asm/smp.h>
#include <asm/ipi.h>
#include <asm/genapic.h>
+#include <asm/setup.h>
extern struct genapic apic_flat;
extern struct genapic apic_physflat;
@@ -53,6 +54,9 @@ void __init setup_apic_routing(void)
genapic = &apic_physflat;
printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
}
+
+ if (x86_quirks->update_genapic)
+ x86_quirks->update_genapic();
}
/* Same for both flat and physical. */
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index c0262791bda4..7fa5f49c2dda 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return 1;
}
-static cpumask_t flat_target_cpus(void)
+static const struct cpumask *flat_target_cpus(void)
{
- return cpu_online_map;
+ return cpu_online_mask;
}
-static cpumask_t flat_vector_allocation_domain(int cpu)
+static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
{
/* Careful. Some cpus do not strictly honor the set of cpus
* specified in the interrupt destination when using lowest
@@ -45,8 +45,8 @@ static cpumask_t flat_vector_allocation_domain(int cpu)
* deliver interrupts to the wrong hyperthread when only one
* hyperthread was specified in the interrupt desitination.
*/
- cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
- return domain;
+ cpumask_clear(retmask);
+ cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
}
/*
@@ -69,9 +69,8 @@ static void flat_init_apic_ldr(void)
apic_write(APIC_LDR, val);
}
-static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
+static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
{
- unsigned long mask = cpus_addr(cpumask)[0];
unsigned long flags;
local_irq_save(flags);
@@ -79,20 +78,41 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
local_irq_restore(flags);
}
+static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
+{
+ unsigned long mask = cpumask_bits(cpumask)[0];
+
+ _flat_send_IPI_mask(mask, vector);
+}
+
+static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
+ int vector)
+{
+ unsigned long mask = cpumask_bits(cpumask)[0];
+ int cpu = smp_processor_id();
+
+ if (cpu < BITS_PER_LONG)
+ clear_bit(cpu, &mask);
+ _flat_send_IPI_mask(mask, vector);
+}
+
static void flat_send_IPI_allbutself(int vector)
{
+ int cpu = smp_processor_id();
#ifdef CONFIG_HOTPLUG_CPU
int hotplug = 1;
#else
int hotplug = 0;
#endif
if (hotplug || vector == NMI_VECTOR) {
- cpumask_t allbutme = cpu_online_map;
+ if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
+ unsigned long mask = cpumask_bits(cpu_online_mask)[0];
- cpu_clear(smp_processor_id(), allbutme);
+ if (cpu < BITS_PER_LONG)
+ clear_bit(cpu, &mask);
- if (!cpus_empty(allbutme))
- flat_send_IPI_mask(allbutme, vector);
+ _flat_send_IPI_mask(mask, vector);
+ }
} else if (num_online_cpus() > 1) {
__send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
}
@@ -101,7 +121,7 @@ static void flat_send_IPI_allbutself(int vector)
static void flat_send_IPI_all(int vector)
{
if (vector == NMI_VECTOR)
- flat_send_IPI_mask(cpu_online_map, vector);
+ flat_send_IPI_mask(cpu_online_mask, vector);
else
__send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
}
@@ -135,9 +155,18 @@ static int flat_apic_id_registered(void)
return physid_isset(read_xapic_id(), phys_cpu_present_map);
}
-static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+ return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
+}
+
+static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
{
- return cpus_addr(cpumask)[0] & APIC_ALL_CPUS;
+ unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
+ unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS;
+
+ return mask1 & mask2;
}
static unsigned int phys_pkg_id(int index_msb)
@@ -157,8 +186,10 @@ struct genapic apic_flat = {
.send_IPI_all = flat_send_IPI_all,
.send_IPI_allbutself = flat_send_IPI_allbutself,
.send_IPI_mask = flat_send_IPI_mask,
+ .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
.send_IPI_self = apic_send_IPI_self,
.cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
.phys_pkg_id = phys_pkg_id,
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
@@ -188,35 +219,39 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return 0;
}
-static cpumask_t physflat_target_cpus(void)
+static const struct cpumask *physflat_target_cpus(void)
{
- return cpu_online_map;
+ return cpu_online_mask;
}
-static cpumask_t physflat_vector_allocation_domain(int cpu)
+static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
{
- return cpumask_of_cpu(cpu);
+ cpumask_clear(retmask);
+ cpumask_set_cpu(cpu, retmask);
}
-static void physflat_send_IPI_mask(cpumask_t cpumask, int vector)
+static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
{
send_IPI_mask_sequence(cpumask, vector);
}
-static void physflat_send_IPI_allbutself(int vector)
+static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
+ int vector)
{
- cpumask_t allbutme = cpu_online_map;
+ send_IPI_mask_allbutself(cpumask, vector);
+}
- cpu_clear(smp_processor_id(), allbutme);
- physflat_send_IPI_mask(allbutme, vector);
+static void physflat_send_IPI_allbutself(int vector)
+{
+ send_IPI_mask_allbutself(cpu_online_mask, vector);
}
static void physflat_send_IPI_all(int vector)
{
- physflat_send_IPI_mask(cpu_online_map, vector);
+ physflat_send_IPI_mask(cpu_online_mask, vector);
}
-static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
int cpu;
@@ -224,13 +259,29 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
* We're using fixed IRQ delivery, can only return one phys APIC ID.
* May as well be the first.
*/
- cpu = first_cpu(cpumask);
+ cpu = cpumask_first(cpumask);
if ((unsigned)cpu < nr_cpu_ids)
return per_cpu(x86_cpu_to_apicid, cpu);
else
return BAD_APICID;
}
+static unsigned int
+physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ cpu = cpumask_any_and(cpumask, andmask);
+ if (cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ return BAD_APICID;
+}
+
struct genapic apic_physflat = {
.name = "physical flat",
.acpi_madt_oem_check = physflat_acpi_madt_oem_check,
@@ -243,8 +294,10 @@ struct genapic apic_physflat = {
.send_IPI_all = physflat_send_IPI_all,
.send_IPI_allbutself = physflat_send_IPI_allbutself,
.send_IPI_mask = physflat_send_IPI_mask,
+ .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
.send_IPI_self = apic_send_IPI_self,
.cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
.phys_pkg_id = phys_pkg_id,
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index f6a2c8eb48a6..4716a0c9f936 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -22,19 +22,18 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
-static cpumask_t x2apic_target_cpus(void)
+static const struct cpumask *x2apic_target_cpus(void)
{
- return cpumask_of_cpu(0);
+ return cpumask_of(0);
}
/*
* for now each logical cpu is in its own vector allocation domain.
*/
-static cpumask_t x2apic_vector_allocation_domain(int cpu)
+static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
{
- cpumask_t domain = CPU_MASK_NONE;
- cpu_set(cpu, domain);
- return domain;
+ cpumask_clear(retmask);
+ cpumask_set_cpu(cpu, retmask);
}
static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -56,32 +55,53 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
* at once. We have 16 cpu's in a cluster. This will minimize IPI register
* writes.
*/
-static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
{
unsigned long flags;
unsigned long query_cpu;
local_irq_save(flags);
- for_each_cpu_mask(query_cpu, mask) {
- __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
- vector, APIC_DEST_LOGICAL);
- }
+ for_each_cpu(query_cpu, mask)
+ __x2apic_send_IPI_dest(
+ per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ vector, APIC_DEST_LOGICAL);
local_irq_restore(flags);
}
-static void x2apic_send_IPI_allbutself(int vector)
+static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
+ int vector)
{
- cpumask_t mask = cpu_online_map;
+ unsigned long flags;
+ unsigned long query_cpu;
+ unsigned long this_cpu = smp_processor_id();
- cpu_clear(smp_processor_id(), mask);
+ local_irq_save(flags);
+ for_each_cpu(query_cpu, mask)
+ if (query_cpu != this_cpu)
+ __x2apic_send_IPI_dest(
+ per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ vector, APIC_DEST_LOGICAL);
+ local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_allbutself(int vector)
+{
+ unsigned long flags;
+ unsigned long query_cpu;
+ unsigned long this_cpu = smp_processor_id();
- if (!cpus_empty(mask))
- x2apic_send_IPI_mask(mask, vector);
+ local_irq_save(flags);
+ for_each_online_cpu(query_cpu)
+ if (query_cpu != this_cpu)
+ __x2apic_send_IPI_dest(
+ per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ vector, APIC_DEST_LOGICAL);
+ local_irq_restore(flags);
}
static void x2apic_send_IPI_all(int vector)
{
- x2apic_send_IPI_mask(cpu_online_map, vector);
+ x2apic_send_IPI_mask(cpu_online_mask, vector);
}
static int x2apic_apic_id_registered(void)
@@ -89,7 +109,7 @@ static int x2apic_apic_id_registered(void)
return 1;
}
-static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
int cpu;
@@ -97,13 +117,28 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
* We're using fixed IRQ delivery, can only return one phys APIC ID.
* May as well be the first.
*/
- cpu = first_cpu(cpumask);
- if ((unsigned)cpu < NR_CPUS)
+ cpu = cpumask_first(cpumask);
+ if ((unsigned)cpu < nr_cpu_ids)
return per_cpu(x86_cpu_to_logical_apicid, cpu);
else
return BAD_APICID;
}
+static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ cpu = cpumask_any_and(cpumask, andmask);
+ if (cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ return BAD_APICID;
+}
+
static unsigned int get_apic_id(unsigned long x)
{
unsigned int id;
@@ -150,8 +185,10 @@ struct genapic apic_x2apic_cluster = {
.send_IPI_all = x2apic_send_IPI_all,
.send_IPI_allbutself = x2apic_send_IPI_allbutself,
.send_IPI_mask = x2apic_send_IPI_mask,
+ .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
.send_IPI_self = x2apic_send_IPI_self,
.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
.phys_pkg_id = phys_pkg_id,
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index d042211768b7..b255507884f2 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -29,16 +29,15 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
-static cpumask_t x2apic_target_cpus(void)
+static const struct cpumask *x2apic_target_cpus(void)
{
- return cpumask_of_cpu(0);
+ return cpumask_of(0);
}
-static cpumask_t x2apic_vector_allocation_domain(int cpu)
+static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
{
- cpumask_t domain = CPU_MASK_NONE;
- cpu_set(cpu, domain);
- return domain;
+ cpumask_clear(retmask);
+ cpumask_set_cpu(cpu, retmask);
}
static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
@@ -54,32 +53,54 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
x2apic_icr_write(cfg, apicid);
}
-static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
{
unsigned long flags;
unsigned long query_cpu;
local_irq_save(flags);
- for_each_cpu_mask(query_cpu, mask) {
+ for_each_cpu(query_cpu, mask) {
__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
vector, APIC_DEST_PHYSICAL);
}
local_irq_restore(flags);
}
-static void x2apic_send_IPI_allbutself(int vector)
+static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
+ int vector)
{
- cpumask_t mask = cpu_online_map;
+ unsigned long flags;
+ unsigned long query_cpu;
+ unsigned long this_cpu = smp_processor_id();
+
+ local_irq_save(flags);
+ for_each_cpu(query_cpu, mask) {
+ if (query_cpu != this_cpu)
+ __x2apic_send_IPI_dest(
+ per_cpu(x86_cpu_to_apicid, query_cpu),
+ vector, APIC_DEST_PHYSICAL);
+ }
+ local_irq_restore(flags);
+}
- cpu_clear(smp_processor_id(), mask);
+static void x2apic_send_IPI_allbutself(int vector)
+{
+ unsigned long flags;
+ unsigned long query_cpu;
+ unsigned long this_cpu = smp_processor_id();
- if (!cpus_empty(mask))
- x2apic_send_IPI_mask(mask, vector);
+ local_irq_save(flags);
+ for_each_online_cpu(query_cpu)
+ if (query_cpu != this_cpu)
+ __x2apic_send_IPI_dest(
+ per_cpu(x86_cpu_to_apicid, query_cpu),
+ vector, APIC_DEST_PHYSICAL);
+ local_irq_restore(flags);
}
static void x2apic_send_IPI_all(int vector)
{
- x2apic_send_IPI_mask(cpu_online_map, vector);
+ x2apic_send_IPI_mask(cpu_online_mask, vector);
}
static int x2apic_apic_id_registered(void)
@@ -87,7 +108,7 @@ static int x2apic_apic_id_registered(void)
return 1;
}
-static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
int cpu;
@@ -95,13 +116,28 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
* We're using fixed IRQ delivery, can only return one phys APIC ID.
* May as well be the first.
*/
- cpu = first_cpu(cpumask);
- if ((unsigned)cpu < NR_CPUS)
+ cpu = cpumask_first(cpumask);
+ if ((unsigned)cpu < nr_cpu_ids)
return per_cpu(x86_cpu_to_apicid, cpu);
else
return BAD_APICID;
}
+static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ cpu = cpumask_any_and(cpumask, andmask);
+ if (cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ return BAD_APICID;
+}
+
static unsigned int get_apic_id(unsigned long x)
{
unsigned int id;
@@ -145,8 +181,10 @@ struct genapic apic_x2apic_phys = {
.send_IPI_all = x2apic_send_IPI_all,
.send_IPI_allbutself = x2apic_send_IPI_allbutself,
.send_IPI_mask = x2apic_send_IPI_mask,
+ .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
.send_IPI_self = x2apic_send_IPI_self,
.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
.phys_pkg_id = phys_pkg_id,
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 2c7dbdb98278..3984682cd849 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -75,16 +75,15 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
-static cpumask_t uv_target_cpus(void)
+static const struct cpumask *uv_target_cpus(void)
{
- return cpumask_of_cpu(0);
+ return cpumask_of(0);
}
-static cpumask_t uv_vector_allocation_domain(int cpu)
+static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
{
- cpumask_t domain = CPU_MASK_NONE;
- cpu_set(cpu, domain);
- return domain;
+ cpumask_clear(retmask);
+ cpumask_set_cpu(cpu, retmask);
}
int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
@@ -123,28 +122,37 @@ static void uv_send_IPI_one(int cpu, int vector)
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
}
-static void uv_send_IPI_mask(cpumask_t mask, int vector)
+static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
{
unsigned int cpu;
- for_each_possible_cpu(cpu)
- if (cpu_isset(cpu, mask))
+ for_each_cpu(cpu, mask)
+ uv_send_IPI_one(cpu, vector);
+}
+
+static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+{
+ unsigned int cpu;
+ unsigned int this_cpu = smp_processor_id();
+
+ for_each_cpu(cpu, mask)
+ if (cpu != this_cpu)
uv_send_IPI_one(cpu, vector);
}
static void uv_send_IPI_allbutself(int vector)
{
- cpumask_t mask = cpu_online_map;
-
- cpu_clear(smp_processor_id(), mask);
+ unsigned int cpu;
+ unsigned int this_cpu = smp_processor_id();
- if (!cpus_empty(mask))
- uv_send_IPI_mask(mask, vector);
+ for_each_online_cpu(cpu)
+ if (cpu != this_cpu)
+ uv_send_IPI_one(cpu, vector);
}
static void uv_send_IPI_all(int vector)
{
- uv_send_IPI_mask(cpu_online_map, vector);
+ uv_send_IPI_mask(cpu_online_mask, vector);
}
static int uv_apic_id_registered(void)
@@ -156,7 +164,7 @@ static void uv_init_apic_ldr(void)
{
}
-static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
+static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
int cpu;
@@ -164,13 +172,28 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
* We're using fixed IRQ delivery, can only return one phys APIC ID.
* May as well be the first.
*/
- cpu = first_cpu(cpumask);
+ cpu = cpumask_first(cpumask);
if ((unsigned)cpu < nr_cpu_ids)
return per_cpu(x86_cpu_to_apicid, cpu);
else
return BAD_APICID;
}
+static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ cpu = cpumask_any_and(cpumask, andmask);
+ if (cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ return BAD_APICID;
+}
+
static unsigned int get_apic_id(unsigned long x)
{
unsigned int id;
@@ -218,8 +241,10 @@ struct genapic apic_x2apic_uv_x = {
.send_IPI_all = uv_send_IPI_all,
.send_IPI_allbutself = uv_send_IPI_allbutself,
.send_IPI_mask = uv_send_IPI_mask,
+ .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
.send_IPI_self = uv_send_IPI_self,
.cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
+ .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
.phys_pkg_id = phys_pkg_id,
.get_apic_id = get_apic_id,
.set_apic_id = set_apic_id,
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 067d8de913f6..e76d7e272974 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -246,7 +246,7 @@ static void hpet_legacy_clockevent_register(void)
* Start hpet with the boot cpu mask and make it
* global after the IO_APIC has been initialized.
*/
- hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+ hpet_clockevent.cpumask = cpumask_of(smp_processor_id());
clockevents_register_device(&hpet_clockevent);
global_clock_event = &hpet_clockevent;
printk(KERN_DEBUG "hpet clockevent registered\n");
@@ -301,7 +301,7 @@ static void hpet_set_mode(enum clock_event_mode mode,
struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
hpet_setup_msi_irq(hdev->irq);
disable_irq(hdev->irq);
- irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
+ irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
enable_irq(hdev->irq);
}
break;
@@ -449,7 +449,7 @@ static int hpet_setup_irq(struct hpet_dev *dev)
return -1;
disable_irq(dev->irq);
- irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
+ irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
enable_irq(dev->irq);
printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
@@ -500,7 +500,7 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
/* 5 usec minimum reprogramming delta. */
evt->min_delta_ns = 5000;
- evt->cpumask = cpumask_of_cpu(hdev->cpu);
+ evt->cpumask = cpumask_of(hdev->cpu);
clockevents_register_device(evt);
}
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 1f20608d4ca8..b0f61f0dcd0a 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -58,7 +58,7 @@ void __cpuinit mxcsr_feature_mask_init(void)
stts();
}
-void __init init_thread_xstate(void)
+void __cpuinit init_thread_xstate(void)
{
if (!HAVE_HWFP) {
xstate_size = sizeof(struct i387_soft_struct);
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index c1b5e3ece1f2..10f92fb532f3 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -114,7 +114,7 @@ void __init setup_pit_timer(void)
* Start pit with the boot cpu mask and make it global after the
* IO_APIC has been initialized.
*/
- pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
+ pit_clockevent.cpumask = cpumask_of(smp_processor_id());
pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
pit_clockevent.shift);
pit_clockevent.max_delta_ns =
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 7a3f2028e2eb..6bd51ce3ce32 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -108,93 +108,271 @@ static int __init parse_noapic(char *str)
early_param("noapic", parse_noapic);
struct irq_pin_list;
+
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * the indexing order of this array favors 1:1 mappings
+ * between pins and IRQs.
+ */
+
+struct irq_pin_list {
+ int apic, pin;
+ struct irq_pin_list *next;
+};
+
+static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+{
+ struct irq_pin_list *pin;
+ int node;
+
+ node = cpu_to_node(cpu);
+
+ pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
+ printk(KERN_DEBUG " alloc irq_2_pin on cpu %d node %d\n", cpu, node);
+
+ return pin;
+}
+
struct irq_cfg {
- unsigned int irq;
struct irq_pin_list *irq_2_pin;
- cpumask_t domain;
- cpumask_t old_domain;
+ cpumask_var_t domain;
+ cpumask_var_t old_domain;
unsigned move_cleanup_count;
u8 vector;
u8 move_in_progress : 1;
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+ u8 move_desc_pending : 1;
+#endif
};
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+#ifdef CONFIG_SPARSE_IRQ
+static struct irq_cfg irq_cfgx[] = {
+#else
static struct irq_cfg irq_cfgx[NR_IRQS] = {
- [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
- [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
- [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
- [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, },
- [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, },
- [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, },
- [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, },
- [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, },
- [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, },
- [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, },
- [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
- [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
- [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
- [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
- [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
- [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+#endif
+ [0] = { .vector = IRQ0_VECTOR, },
+ [1] = { .vector = IRQ1_VECTOR, },
+ [2] = { .vector = IRQ2_VECTOR, },
+ [3] = { .vector = IRQ3_VECTOR, },
+ [4] = { .vector = IRQ4_VECTOR, },
+ [5] = { .vector = IRQ5_VECTOR, },
+ [6] = { .vector = IRQ6_VECTOR, },
+ [7] = { .vector = IRQ7_VECTOR, },
+ [8] = { .vector = IRQ8_VECTOR, },
+ [9] = { .vector = IRQ9_VECTOR, },
+ [10] = { .vector = IRQ10_VECTOR, },
+ [11] = { .vector = IRQ11_VECTOR, },
+ [12] = { .vector = IRQ12_VECTOR, },
+ [13] = { .vector = IRQ13_VECTOR, },
+ [14] = { .vector = IRQ14_VECTOR, },
+ [15] = { .vector = IRQ15_VECTOR, },
};
-#define for_each_irq_cfg(irq, cfg) \
- for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
+void __init arch_early_irq_init(void)
+{
+ struct irq_cfg *cfg;
+ struct irq_desc *desc;
+ int count;
+ int i;
+ cfg = irq_cfgx;
+ count = ARRAY_SIZE(irq_cfgx);
+
+ for (i = 0; i < count; i++) {
+ desc = irq_to_desc(i);
+ desc->chip_data = &cfg[i];
+ alloc_bootmem_cpumask_var(&cfg[i].domain);
+ alloc_bootmem_cpumask_var(&cfg[i].old_domain);
+ if (i < NR_IRQS_LEGACY)
+ cpumask_setall(cfg[i].domain);
+ }
+}
+
+#ifdef CONFIG_SPARSE_IRQ
static struct irq_cfg *irq_cfg(unsigned int irq)
{
- return irq < nr_irqs ? irq_cfgx + irq : NULL;
+ struct irq_cfg *cfg = NULL;
+ struct irq_desc *desc;
+
+ desc = irq_to_desc(irq);
+ if (desc)
+ cfg = desc->chip_data;
+
+ return cfg;
}
-static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+static struct irq_cfg *get_one_free_irq_cfg(int cpu)
{
- return irq_cfg(irq);
+ struct irq_cfg *cfg;
+ int node;
+
+ node = cpu_to_node(cpu);
+
+ cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
+ if (cfg) {
+ /* FIXME: needs alloc_cpumask_var_node() */
+ if (!alloc_cpumask_var(&cfg->domain, GFP_ATOMIC)) {
+ kfree(cfg);
+ cfg = NULL;
+ } else if (!alloc_cpumask_var(&cfg->old_domain, GFP_ATOMIC)) {
+ free_cpumask_var(cfg->domain);
+ kfree(cfg);
+ cfg = NULL;
+ } else {
+ cpumask_clear(cfg->domain);
+ cpumask_clear(cfg->old_domain);
+ }
+ }
+ printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node);
+
+ return cfg;
}
-/*
- * Rough estimation of how many shared IRQs there are, can be changed
- * anytime.
- */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+void arch_init_chip_data(struct irq_desc *desc, int cpu)
+{
+ struct irq_cfg *cfg;
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
+ cfg = desc->chip_data;
+ if (!cfg) {
+ desc->chip_data = get_one_free_irq_cfg(cpu);
+ if (!desc->chip_data) {
+ printk(KERN_ERR "can not alloc irq_cfg\n");
+ BUG_ON(1);
+ }
+ }
+}
-struct irq_pin_list {
- int apic, pin;
- struct irq_pin_list *next;
-};
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
-static struct irq_pin_list *irq_2_pin_ptr;
+static void
+init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+{
+ struct irq_pin_list *old_entry, *head, *tail, *entry;
+
+ cfg->irq_2_pin = NULL;
+ old_entry = old_cfg->irq_2_pin;
+ if (!old_entry)
+ return;
+
+ entry = get_one_free_irq_2_pin(cpu);
+ if (!entry)
+ return;
-static void __init irq_2_pin_init(void)
+ entry->apic = old_entry->apic;
+ entry->pin = old_entry->pin;
+ head = entry;
+ tail = entry;
+ old_entry = old_entry->next;
+ while (old_entry) {
+ entry = get_one_free_irq_2_pin(cpu);
+ if (!entry) {
+ entry = head;
+ while (entry) {
+ head = entry->next;
+ kfree(entry);
+ entry = head;
+ }
+ /* still use the old one */
+ return;
+ }
+ entry->apic = old_entry->apic;
+ entry->pin = old_entry->pin;
+ tail->next = entry;
+ tail = entry;
+ old_entry = old_entry->next;
+ }
+
+ tail->next = NULL;
+ cfg->irq_2_pin = head;
+}
+
+static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
{
- struct irq_pin_list *pin = irq_2_pin_head;
- int i;
+ struct irq_pin_list *entry, *next;
- for (i = 1; i < PIN_MAP_SIZE; i++)
- pin[i-1].next = &pin[i];
+ if (old_cfg->irq_2_pin == cfg->irq_2_pin)
+ return;
- irq_2_pin_ptr = &pin[0];
+ entry = old_cfg->irq_2_pin;
+
+ while (entry) {
+ next = entry->next;
+ kfree(entry);
+ entry = next;
+ }
+ old_cfg->irq_2_pin = NULL;
}
-static struct irq_pin_list *get_one_free_irq_2_pin(void)
+void arch_init_copy_chip_data(struct irq_desc *old_desc,
+ struct irq_desc *desc, int cpu)
{
- struct irq_pin_list *pin = irq_2_pin_ptr;
+ struct irq_cfg *cfg;
+ struct irq_cfg *old_cfg;
- if (!pin)
- panic("can not get more irq_2_pin\n");
+ cfg = get_one_free_irq_cfg(cpu);
- irq_2_pin_ptr = pin->next;
- pin->next = NULL;
- return pin;
+ if (!cfg)
+ return;
+
+ desc->chip_data = cfg;
+
+ old_cfg = old_desc->chip_data;
+
+ memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
+
+ init_copy_irq_2_pin(old_cfg, cfg, cpu);
+}
+
+static void free_irq_cfg(struct irq_cfg *old_cfg)
+{
+ kfree(old_cfg);
+}
+
+void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
+{
+ struct irq_cfg *old_cfg, *cfg;
+
+ old_cfg = old_desc->chip_data;
+ cfg = desc->chip_data;
+
+ if (old_cfg == cfg)
+ return;
+
+ if (old_cfg) {
+ free_irq_2_pin(old_cfg, cfg);
+ free_irq_cfg(old_cfg);
+ old_desc->chip_data = NULL;
+ }
+}
+
+static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+{
+ struct irq_cfg *cfg = desc->chip_data;
+
+ if (!cfg->move_in_progress) {
+ /* it means that domain is not changed */
+ if (!cpus_intersects(desc->affinity, mask))
+ cfg->move_desc_pending = 1;
+ }
}
+#endif
+
+#else
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+ return irq < nr_irqs ? irq_cfgx + irq : NULL;
+}
+
+#endif
+
+#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
+static inline void
+set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
+{
+}
+#endif
struct io_apic {
unsigned int index;
@@ -237,11 +415,10 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
writel(value, &io_apic->data);
}
-static bool io_apic_level_ack_pending(unsigned int irq)
+static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
{
struct irq_pin_list *entry;
unsigned long flags;
- struct irq_cfg *cfg = irq_cfg(irq);
spin_lock_irqsave(&ioapic_lock, flags);
entry = cfg->irq_2_pin;
@@ -323,13 +500,32 @@ static void ioapic_mask_entry(int apic, int pin)
}
#ifdef CONFIG_SMP
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
+static void send_cleanup_vector(struct irq_cfg *cfg)
+{
+ cpumask_var_t cleanup_mask;
+
+ if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+ unsigned int i;
+ cfg->move_cleanup_count = 0;
+ for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+ cfg->move_cleanup_count++;
+ for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+ send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
+ } else {
+ cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+ cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
+ send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+ free_cpumask_var(cleanup_mask);
+ }
+ cfg->move_in_progress = 0;
+}
+
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
{
int apic, pin;
- struct irq_cfg *cfg;
struct irq_pin_list *entry;
+ u8 vector = cfg->vector;
- cfg = irq_cfg(irq);
entry = cfg->irq_2_pin;
for (;;) {
unsigned int reg;
@@ -359,36 +555,61 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
}
}
-static int assign_irq_vector(int irq, cpumask_t mask);
+static int
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
+
+/*
+ * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid
+ * of that, or returns BAD_APICID and leaves desc->affinity untouched.
+ */
+static unsigned int
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
+{
+ struct irq_cfg *cfg;
+ unsigned int irq;
+
+ if (!cpumask_intersects(mask, cpu_online_mask))
+ return BAD_APICID;
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+ irq = desc->irq;
+ cfg = desc->chip_data;
+ if (assign_irq_vector(irq, cfg, mask))
+ return BAD_APICID;
+
+ cpumask_and(&desc->affinity, cfg->domain, mask);
+ set_extra_move_desc(desc, mask);
+ return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask);
+}
+
+static void
+set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
{
struct irq_cfg *cfg;
unsigned long flags;
unsigned int dest;
- cpumask_t tmp;
- struct irq_desc *desc;
+ unsigned int irq;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
- return;
+ irq = desc->irq;
+ cfg = desc->chip_data;
- cfg = irq_cfg(irq);
- if (assign_irq_vector(irq, mask))
- return;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ dest = set_desc_affinity(desc, mask);
+ if (dest != BAD_APICID) {
+ /* Only the high 8 bits are valid. */
+ dest = SET_APIC_LOGICAL_ID(dest);
+ __target_IO_APIC_irq(irq, dest, cfg);
+ }
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
- cpus_and(tmp, cfg->domain, mask);
- dest = cpu_mask_to_apicid(tmp);
- /*
- * Only the high 8 bits are valid.
- */
- dest = SET_APIC_LOGICAL_ID(dest);
+static void
+set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
+{
+ struct irq_desc *desc;
desc = irq_to_desc(irq);
- spin_lock_irqsave(&ioapic_lock, flags);
- __target_IO_APIC_irq(irq, dest, cfg->vector);
- desc->affinity = mask;
- spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ set_ioapic_affinity_irq_desc(desc, mask);
}
#endif /* CONFIG_SMP */
@@ -397,16 +618,18 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
* shared ISA-space IRQs, so we have to support them. We are super
* fast in the common case, and fast for shared ISA-space IRQs.
*/
-static void add_pin_to_irq(unsigned int irq, int apic, int pin)
+static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
{
- struct irq_cfg *cfg;
struct irq_pin_list *entry;
- /* first time to refer irq_cfg, so with new */
- cfg = irq_cfg_alloc(irq);
entry = cfg->irq_2_pin;
if (!entry) {
- entry = get_one_free_irq_2_pin();
+ entry = get_one_free_irq_2_pin(cpu);
+ if (!entry) {
+ printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
+ apic, pin);
+ return;
+ }
cfg->irq_2_pin = entry;
entry->apic = apic;
entry->pin = pin;
@@ -421,7 +644,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
entry = entry->next;
}
- entry->next = get_one_free_irq_2_pin();
+ entry->next = get_one_free_irq_2_pin(cpu);
entry = entry->next;
entry->apic = apic;
entry->pin = pin;
@@ -430,11 +653,10 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
/*
* Reroute an IRQ to a different pin.
*/
-static void __init replace_pin_at_irq(unsigned int irq,
+static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
int oldapic, int oldpin,
int newapic, int newpin)
{
- struct irq_cfg *cfg = irq_cfg(irq);
struct irq_pin_list *entry = cfg->irq_2_pin;
int replaced = 0;
@@ -451,18 +673,16 @@ static void __init replace_pin_at_irq(unsigned int irq,
/* why? call replace before add? */
if (!replaced)
- add_pin_to_irq(irq, newapic, newpin);
+ add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
}
-static inline void io_apic_modify_irq(unsigned int irq,
+static inline void io_apic_modify_irq(struct irq_cfg *cfg,
int mask_and, int mask_or,
void (*final)(struct irq_pin_list *entry))
{
int pin;
- struct irq_cfg *cfg;
struct irq_pin_list *entry;
- cfg = irq_cfg(irq);
for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
unsigned int reg;
pin = entry->pin;
@@ -475,9 +695,9 @@ static inline void io_apic_modify_irq(unsigned int irq,
}
}
-static void __unmask_IO_APIC_irq(unsigned int irq)
+static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
{
- io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
+ io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
}
#ifdef CONFIG_X86_64
@@ -492,47 +712,64 @@ void io_apic_sync(struct irq_pin_list *entry)
readl(&io_apic->data);
}
-static void __mask_IO_APIC_irq(unsigned int irq)
+static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
{
- io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+ io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
}
#else /* CONFIG_X86_32 */
-static void __mask_IO_APIC_irq(unsigned int irq)
+static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
{
- io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
+ io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
}
-static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
+static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
{
- io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+ io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
IO_APIC_REDIR_MASKED, NULL);
}
-static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
+static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
{
- io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
+ io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
}
#endif /* CONFIG_X86_32 */
-static void mask_IO_APIC_irq (unsigned int irq)
+static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
{
+ struct irq_cfg *cfg = desc->chip_data;
unsigned long flags;
+ BUG_ON(!cfg);
+
spin_lock_irqsave(&ioapic_lock, flags);
- __mask_IO_APIC_irq(irq);
+ __mask_IO_APIC_irq(cfg);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
-static void unmask_IO_APIC_irq (unsigned int irq)
+static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
{
+ struct irq_cfg *cfg = desc->chip_data;
unsigned long flags;
spin_lock_irqsave(&ioapic_lock, flags);
- __unmask_IO_APIC_irq(irq);
+ __unmask_IO_APIC_irq(cfg);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
+static void mask_IO_APIC_irq(unsigned int irq)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ mask_IO_APIC_irq_desc(desc);
+}
+static void unmask_IO_APIC_irq(unsigned int irq)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ unmask_IO_APIC_irq_desc(desc);
+}
+
static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
{
struct IO_APIC_route_entry entry;
@@ -809,7 +1046,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
*/
static int EISA_ELCR(unsigned int irq)
{
- if (irq < 16) {
+ if (irq < NR_IRQS_LEGACY) {
unsigned int port = 0x4d0 + (irq >> 3);
return (inb(port) >> (irq & 7)) & 1;
}
@@ -1034,7 +1271,8 @@ void unlock_vector_lock(void)
spin_unlock(&vector_lock);
}
-static int __assign_irq_vector(int irq, cpumask_t mask)
+static int
+__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
{
/*
* NOTE! The local APIC isn't very good at handling
@@ -1049,39 +1287,39 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
*/
static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
unsigned int old_vector;
- int cpu;
- struct irq_cfg *cfg;
-
- cfg = irq_cfg(irq);
-
- /* Only try and allocate irqs on cpus that are present */
- cpus_and(mask, mask, cpu_online_map);
+ int cpu, err;
+ cpumask_var_t tmp_mask;
if ((cfg->move_in_progress) || cfg->move_cleanup_count)
return -EBUSY;
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+ return -ENOMEM;
+
old_vector = cfg->vector;
if (old_vector) {
- cpumask_t tmp;
- cpus_and(tmp, cfg->domain, mask);
- if (!cpus_empty(tmp))
+ cpumask_and(tmp_mask, mask, cpu_online_mask);
+ cpumask_and(tmp_mask, cfg->domain, tmp_mask);
+ if (!cpumask_empty(tmp_mask)) {
+ free_cpumask_var(tmp_mask);
return 0;
+ }
}
- for_each_cpu_mask_nr(cpu, mask) {
- cpumask_t domain, new_mask;
+ /* Only try and allocate irqs on cpus that are present */
+ err = -ENOSPC;
+ for_each_cpu_and(cpu, mask, cpu_online_mask) {
int new_cpu;
int vector, offset;
- domain = vector_allocation_domain(cpu);
- cpus_and(new_mask, domain, cpu_online_map);
+ vector_allocation_domain(cpu, tmp_mask);
vector = current_vector;
offset = current_offset;
next:
vector += 8;
if (vector >= first_system_vector) {
- /* If we run out of vectors on large boxen, must share them. */
+ /* If out of vectors on large boxen, must share them. */
offset = (offset + 1) % 8;
vector = FIRST_DEVICE_VECTOR + offset;
}
@@ -1094,7 +1332,7 @@ next:
if (vector == SYSCALL_VECTOR)
goto next;
#endif
- for_each_cpu_mask_nr(new_cpu, new_mask)
+ for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
if (per_cpu(vector_irq, new_cpu)[vector] != -1)
goto next;
/* Found one! */
@@ -1102,44 +1340,56 @@ next:
current_offset = offset;
if (old_vector) {
cfg->move_in_progress = 1;
- cfg->old_domain = cfg->domain;
+ cpumask_copy(cfg->old_domain, cfg->domain);
}
- for_each_cpu_mask_nr(new_cpu, new_mask)
+ for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
per_cpu(vector_irq, new_cpu)[vector] = irq;
cfg->vector = vector;
- cfg->domain = domain;
- return 0;
+ cpumask_copy(cfg->domain, tmp_mask);
+ err = 0;
+ break;
}
- return -ENOSPC;
+ free_cpumask_var(tmp_mask);
+ return err;
}
-static int assign_irq_vector(int irq, cpumask_t mask)
+static int
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
{
int err;
unsigned long flags;
spin_lock_irqsave(&vector_lock, flags);
- err = __assign_irq_vector(irq, mask);
+ err = __assign_irq_vector(irq, cfg, mask);
spin_unlock_irqrestore(&vector_lock, flags);
return err;
}
-static void __clear_irq_vector(int irq)
+static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
{
- struct irq_cfg *cfg;
- cpumask_t mask;
int cpu, vector;
- cfg = irq_cfg(irq);
BUG_ON(!cfg->vector);
vector = cfg->vector;
- cpus_and(mask, cfg->domain, cpu_online_map);
- for_each_cpu_mask_nr(cpu, mask)
+ for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
per_cpu(vector_irq, cpu)[vector] = -1;
cfg->vector = 0;
- cpus_clear(cfg->domain);
+ cpumask_clear(cfg->domain);
+
+ if (likely(!cfg->move_in_progress))
+ return;
+ for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
+ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
+ vector++) {
+ if (per_cpu(vector_irq, cpu)[vector] != irq)
+ continue;
+ per_cpu(vector_irq, cpu)[vector] = -1;
+ break;
+ }
+ }
+ cfg->move_in_progress = 0;
}
void __setup_vector_irq(int cpu)
@@ -1148,10 +1398,14 @@ void __setup_vector_irq(int cpu)
/* This function must be called with vector_lock held */
int irq, vector;
struct irq_cfg *cfg;
+ struct irq_desc *desc;
/* Mark the inuse vectors */
- for_each_irq_cfg(irq, cfg) {
- if (!cpu_isset(cpu, cfg->domain))
+ for_each_irq_desc(irq, desc) {
+ if (!desc)
+ continue;
+ cfg = desc->chip_data;
+ if (!cpumask_test_cpu(cpu, cfg->domain))
continue;
vector = cfg->vector;
per_cpu(vector_irq, cpu)[vector] = irq;
@@ -1163,7 +1417,7 @@ void __setup_vector_irq(int cpu)
continue;
cfg = irq_cfg(irq);
- if (!cpu_isset(cpu, cfg->domain))
+ if (!cpumask_test_cpu(cpu, cfg->domain))
per_cpu(vector_irq, cpu)[vector] = -1;
}
}
@@ -1201,11 +1455,8 @@ static inline int IO_APIC_irq_trigger(int irq)
}
#endif
-static void ioapic_register_intr(int irq, unsigned long trigger)
+static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
{
- struct irq_desc *desc;
-
- desc = irq_to_desc(irq);
if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
trigger == IOAPIC_LEVEL)
@@ -1297,23 +1548,22 @@ static int setup_ioapic_entry(int apic, int irq,
return 0;
}
-static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
+static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc,
int trigger, int polarity)
{
struct irq_cfg *cfg;
struct IO_APIC_route_entry entry;
- cpumask_t mask;
+ unsigned int dest;
if (!IO_APIC_IRQ(irq))
return;
- cfg = irq_cfg(irq);
+ cfg = desc->chip_data;
- mask = TARGET_CPUS;
- if (assign_irq_vector(irq, mask))
+ if (assign_irq_vector(irq, cfg, TARGET_CPUS))
return;
- cpus_and(mask, cfg->domain, mask);
+ dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
apic_printk(APIC_VERBOSE,KERN_DEBUG
"IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1323,16 +1573,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
- cpu_mask_to_apicid(mask), trigger, polarity,
- cfg->vector)) {
+ dest, trigger, polarity, cfg->vector)) {
printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
mp_ioapics[apic].mp_apicid, pin);
- __clear_irq_vector(irq);
+ __clear_irq_vector(irq, cfg);
return;
}
- ioapic_register_intr(irq, trigger);
- if (irq < 16)
+ ioapic_register_intr(irq, desc, trigger);
+ if (irq < NR_IRQS_LEGACY)
disable_8259A_irq(irq);
ioapic_write_entry(apic, pin, entry);
@@ -1342,6 +1591,9 @@ static void __init setup_IO_APIC_irqs(void)
{
int apic, pin, idx, irq;
int notcon = 0;
+ struct irq_desc *desc;
+ struct irq_cfg *cfg;
+ int cpu = boot_cpu_id;
apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
@@ -1373,9 +1625,15 @@ static void __init setup_IO_APIC_irqs(void)
if (multi_timer_check(apic, irq))
continue;
#endif
- add_pin_to_irq(irq, apic, pin);
+ desc = irq_to_desc_alloc_cpu(irq, cpu);
+ if (!desc) {
+ printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+ continue;
+ }
+ cfg = desc->chip_data;
+ add_pin_to_irq_cpu(cfg, cpu, apic, pin);
- setup_IO_APIC_irq(apic, pin, irq,
+ setup_IO_APIC_irq(apic, pin, irq, desc,
irq_trigger(idx), irq_polarity(idx));
}
}
@@ -1434,6 +1692,7 @@ __apicdebuginit(void) print_IO_APIC(void)
union IO_APIC_reg_03 reg_03;
unsigned long flags;
struct irq_cfg *cfg;
+ struct irq_desc *desc;
unsigned int irq;
if (apic_verbosity == APIC_QUIET)
@@ -1523,8 +1782,13 @@ __apicdebuginit(void) print_IO_APIC(void)
}
}
printk(KERN_DEBUG "IRQ to pin mappings:\n");
- for_each_irq_cfg(irq, cfg) {
- struct irq_pin_list *entry = cfg->irq_2_pin;
+ for_each_irq_desc(irq, desc) {
+ struct irq_pin_list *entry;
+
+ if (!desc)
+ continue;
+ cfg = desc->chip_data;
+ entry = cfg->irq_2_pin;
if (!entry)
continue;
printk(KERN_DEBUG "IRQ%d ", irq);
@@ -2008,14 +2272,16 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
{
int was_pending = 0;
unsigned long flags;
+ struct irq_cfg *cfg;
spin_lock_irqsave(&ioapic_lock, flags);
- if (irq < 16) {
+ if (irq < NR_IRQS_LEGACY) {
disable_8259A_irq(irq);
if (i8259A_irq_pending(irq))
was_pending = 1;
}
- __unmask_IO_APIC_irq(irq);
+ cfg = irq_cfg(irq);
+ __unmask_IO_APIC_irq(cfg);
spin_unlock_irqrestore(&ioapic_lock, flags);
return was_pending;
@@ -2029,7 +2295,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
unsigned long flags;
spin_lock_irqsave(&vector_lock, flags);
- send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
+ send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
spin_unlock_irqrestore(&vector_lock, flags);
return 1;
@@ -2078,35 +2344,35 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
* as simple as edge triggered migration and we can do the irq migration
* with a simple atomic update to IO-APIC RTE.
*/
-static void migrate_ioapic_irq(int irq, cpumask_t mask)
+static void
+migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
{
struct irq_cfg *cfg;
- struct irq_desc *desc;
- cpumask_t tmp, cleanup_mask;
struct irte irte;
int modify_ioapic_rte;
unsigned int dest;
unsigned long flags;
+ unsigned int irq;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
+ if (!cpumask_intersects(mask, cpu_online_mask))
return;
+ irq = desc->irq;
if (get_irte(irq, &irte))
return;
- if (assign_irq_vector(irq, mask))
+ cfg = desc->chip_data;
+ if (assign_irq_vector(irq, cfg, mask))
return;
- cfg = irq_cfg(irq);
- cpus_and(tmp, cfg->domain, mask);
- dest = cpu_mask_to_apicid(tmp);
+ set_extra_move_desc(desc, mask);
+
+ dest = cpu_mask_to_apicid_and(cfg->domain, mask);
- desc = irq_to_desc(irq);
modify_ioapic_rte = desc->status & IRQ_LEVEL;
if (modify_ioapic_rte) {
spin_lock_irqsave(&ioapic_lock, flags);
- __target_IO_APIC_irq(irq, dest, cfg->vector);
+ __target_IO_APIC_irq(irq, dest, cfg);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
@@ -2118,24 +2384,20 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
*/
modify_irte(irq, &irte);
- if (cfg->move_in_progress) {
- cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
- cfg->move_cleanup_count = cpus_weight(cleanup_mask);
- send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
- cfg->move_in_progress = 0;
- }
+ if (cfg->move_in_progress)
+ send_cleanup_vector(cfg);
- desc->affinity = mask;
+ cpumask_copy(&desc->affinity, mask);
}
-static int migrate_irq_remapped_level(int irq)
+static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
{
int ret = -1;
- struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_cfg *cfg = desc->chip_data;
- mask_IO_APIC_irq(irq);
+ mask_IO_APIC_irq_desc(desc);
- if (io_apic_level_ack_pending(irq)) {
+ if (io_apic_level_ack_pending(cfg)) {
/*
* Interrupt in progress. Migrating irq now will change the
* vector information in the IO-APIC RTE and that will confuse
@@ -2147,14 +2409,15 @@ static int migrate_irq_remapped_level(int irq)
}
/* everthing is clear. we have right of way */
- migrate_ioapic_irq(irq, desc->pending_mask);
+ migrate_ioapic_irq_desc(desc, &desc->pending_mask);
ret = 0;
desc->status &= ~IRQ_MOVE_PENDING;
- cpus_clear(desc->pending_mask);
+ cpumask_clear(&desc->pending_mask);
unmask:
- unmask_IO_APIC_irq(irq);
+ unmask_IO_APIC_irq_desc(desc);
+
return ret;
}
@@ -2164,6 +2427,9 @@ static void ir_irq_migration(struct work_struct *work)
struct irq_desc *desc;
for_each_irq_desc(irq, desc) {
+ if (!desc)
+ continue;
+
if (desc->status & IRQ_MOVE_PENDING) {
unsigned long flags;
@@ -2175,7 +2441,7 @@ static void ir_irq_migration(struct work_struct *work)
continue;
}
- desc->chip->set_affinity(irq, desc->pending_mask);
+ desc->chip->set_affinity(irq, &desc->pending_mask);
spin_unlock_irqrestore(&desc->lock, flags);
}
}
@@ -2184,18 +2450,24 @@ static void ir_irq_migration(struct work_struct *work)
/*
* Migrates the IRQ destination in the process context.
*/
-static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+ const struct cpumask *mask)
{
- struct irq_desc *desc = irq_to_desc(irq);
-
if (desc->status & IRQ_LEVEL) {
desc->status |= IRQ_MOVE_PENDING;
- desc->pending_mask = mask;
- migrate_irq_remapped_level(irq);
+ cpumask_copy(&desc->pending_mask, mask);
+ migrate_irq_remapped_level_desc(desc);
return;
}
- migrate_ioapic_irq(irq, mask);
+ migrate_ioapic_irq_desc(desc, mask);
+}
+static void set_ir_ioapic_affinity_irq(unsigned int irq,
+ const struct cpumask *mask)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ set_ir_ioapic_affinity_irq_desc(desc, mask);
}
#endif
@@ -2215,6 +2487,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
struct irq_cfg *cfg;
irq = __get_cpu_var(vector_irq)[vector];
+ if (irq == -1)
+ continue;
+
desc = irq_to_desc(irq);
if (!desc)
continue;
@@ -2224,7 +2499,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
if (!cfg->move_cleanup_count)
goto unlock;
- if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
+ if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
goto unlock;
__get_cpu_var(vector_irq)[vector] = -1;
@@ -2236,28 +2511,44 @@ unlock:
irq_exit();
}
-static void irq_complete_move(unsigned int irq)
+static void irq_complete_move(struct irq_desc **descp)
{
- struct irq_cfg *cfg = irq_cfg(irq);
+ struct irq_desc *desc = *descp;
+ struct irq_cfg *cfg = desc->chip_data;
unsigned vector, me;
- if (likely(!cfg->move_in_progress))
+ if (likely(!cfg->move_in_progress)) {
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+ if (likely(!cfg->move_desc_pending))
+ return;
+
+ /* domain is not change, but affinity is changed */
+ me = smp_processor_id();
+ if (cpu_isset(me, desc->affinity)) {
+ *descp = desc = move_irq_desc(desc, me);
+ /* get the new one */
+ cfg = desc->chip_data;
+ cfg->move_desc_pending = 0;
+ }
+#endif
return;
+ }
vector = ~get_irq_regs()->orig_ax;
me = smp_processor_id();
- if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
- cpumask_t cleanup_mask;
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+ *descp = desc = move_irq_desc(desc, me);
+ /* get the new one */
+ cfg = desc->chip_data;
+#endif
- cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
- cfg->move_cleanup_count = cpus_weight(cleanup_mask);
- send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
- cfg->move_in_progress = 0;
- }
+ if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+ send_cleanup_vector(cfg);
}
#else
-static inline void irq_complete_move(unsigned int irq) {}
+static inline void irq_complete_move(struct irq_desc **descp) {}
#endif
+
#ifdef CONFIG_INTR_REMAP
static void ack_x2apic_level(unsigned int irq)
{
@@ -2268,11 +2559,14 @@ static void ack_x2apic_edge(unsigned int irq)
{
ack_x2APIC_irq();
}
+
#endif
static void ack_apic_edge(unsigned int irq)
{
- irq_complete_move(irq);
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ irq_complete_move(&desc);
move_native_irq(irq);
ack_APIC_irq();
}
@@ -2281,18 +2575,21 @@ atomic_t irq_mis_count;
static void ack_apic_level(unsigned int irq)
{
+ struct irq_desc *desc = irq_to_desc(irq);
+
#ifdef CONFIG_X86_32
unsigned long v;
int i;
#endif
+ struct irq_cfg *cfg;
int do_unmask_irq = 0;
- irq_complete_move(irq);
+ irq_complete_move(&desc);
#ifdef CONFIG_GENERIC_PENDING_IRQ
/* If we are moving the irq we need to mask it */
- if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
+ if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
do_unmask_irq = 1;
- mask_IO_APIC_irq(irq);
+ mask_IO_APIC_irq_desc(desc);
}
#endif
@@ -2316,7 +2613,8 @@ static void ack_apic_level(unsigned int irq)
* operation to prevent an edge-triggered interrupt escaping meanwhile.
* The idea is from Manfred Spraul. --macro
*/
- i = irq_cfg(irq)->vector;
+ cfg = desc->chip_data;
+ i = cfg->vector;
v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
#endif
@@ -2355,17 +2653,18 @@ static void ack_apic_level(unsigned int irq)
* accurate and is causing problems then it is a hardware bug
* and you can go talk to the chipset vendor about it.
*/
- if (!io_apic_level_ack_pending(irq))
+ cfg = desc->chip_data;
+ if (!io_apic_level_ack_pending(cfg))
move_masked_irq(irq);
- unmask_IO_APIC_irq(irq);
+ unmask_IO_APIC_irq_desc(desc);
}
#ifdef CONFIG_X86_32
if (!(v & (1 << (i & 0x1f)))) {
atomic_inc(&irq_mis_count);
spin_lock(&ioapic_lock);
- __mask_and_edge_IO_APIC_irq(irq);
- __unmask_and_level_IO_APIC_irq(irq);
+ __mask_and_edge_IO_APIC_irq(cfg);
+ __unmask_and_level_IO_APIC_irq(cfg);
spin_unlock(&ioapic_lock);
}
#endif
@@ -2416,20 +2715,22 @@ static inline void init_IO_APIC_traps(void)
* Also, we've got to be careful not to trash gate
* 0x80, because int 0x80 is hm, kind of importantish. ;)
*/
- for_each_irq_cfg(irq, cfg) {
- if (IO_APIC_IRQ(irq) && !cfg->vector) {
+ for_each_irq_desc(irq, desc) {
+ if (!desc)
+ continue;
+
+ cfg = desc->chip_data;
+ if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
/*
* Hmm.. We don't have an entry for this,
* so default to an old-fashioned 8259
* interrupt if we can..
*/
- if (irq < 16)
+ if (irq < NR_IRQS_LEGACY)
make_8259A_irq(irq);
- else {
- desc = irq_to_desc(irq);
+ else
/* Strange. Oh, well.. */
desc->chip = &no_irq_chip;
- }
}
}
}
@@ -2454,7 +2755,7 @@ static void unmask_lapic_irq(unsigned int irq)
apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
}
-static void ack_lapic_irq (unsigned int irq)
+static void ack_lapic_irq(unsigned int irq)
{
ack_APIC_irq();
}
@@ -2466,11 +2767,8 @@ static struct irq_chip lapic_chip __read_mostly = {
.ack = ack_lapic_irq,
};
-static void lapic_register_intr(int irq)
+static void lapic_register_intr(int irq, struct irq_desc *desc)
{
- struct irq_desc *desc;
-
- desc = irq_to_desc(irq);
desc->status &= ~IRQ_LEVEL;
set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
"edge");
@@ -2574,7 +2872,9 @@ int timer_through_8259 __initdata;
*/
static inline void __init check_timer(void)
{
- struct irq_cfg *cfg = irq_cfg(0);
+ struct irq_desc *desc = irq_to_desc(0);
+ struct irq_cfg *cfg = desc->chip_data;
+ int cpu = boot_cpu_id;
int apic1, pin1, apic2, pin2;
unsigned long flags;
unsigned int ver;
@@ -2589,7 +2889,7 @@ static inline void __init check_timer(void)
* get/set the timer IRQ vector:
*/
disable_8259A_irq(0);
- assign_irq_vector(0, TARGET_CPUS);
+ assign_irq_vector(0, cfg, TARGET_CPUS);
/*
* As IRQ0 is to be enabled in the 8259A, the virtual
@@ -2640,10 +2940,10 @@ static inline void __init check_timer(void)
* Ok, does IRQ0 through the IOAPIC work?
*/
if (no_pin1) {
- add_pin_to_irq(0, apic1, pin1);
+ add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
}
- unmask_IO_APIC_irq(0);
+ unmask_IO_APIC_irq_desc(desc);
if (timer_irq_works()) {
if (nmi_watchdog == NMI_IO_APIC) {
setup_nmi();
@@ -2669,9 +2969,9 @@ static inline void __init check_timer(void)
/*
* legacy devices should be connected to IO APIC #0
*/
- replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
+ replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
- unmask_IO_APIC_irq(0);
+ unmask_IO_APIC_irq_desc(desc);
enable_8259A_irq(0);
if (timer_irq_works()) {
apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
@@ -2703,7 +3003,7 @@ static inline void __init check_timer(void)
apic_printk(APIC_QUIET, KERN_INFO
"...trying to set up timer as Virtual Wire IRQ...\n");
- lapic_register_intr(0);
+ lapic_register_intr(0, desc);
apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
enable_8259A_irq(0);
@@ -2888,22 +3188,26 @@ unsigned int create_irq_nr(unsigned int irq_want)
unsigned int irq;
unsigned int new;
unsigned long flags;
- struct irq_cfg *cfg_new;
-
- irq_want = nr_irqs - 1;
+ struct irq_cfg *cfg_new = NULL;
+ int cpu = boot_cpu_id;
+ struct irq_desc *desc_new = NULL;
irq = 0;
spin_lock_irqsave(&vector_lock, flags);
- for (new = irq_want; new > 0; new--) {
+ for (new = irq_want; new < NR_IRQS; new++) {
if (platform_legacy_irq(new))
continue;
- cfg_new = irq_cfg(new);
- if (cfg_new && cfg_new->vector != 0)
+
+ desc_new = irq_to_desc_alloc_cpu(new, cpu);
+ if (!desc_new) {
+ printk(KERN_INFO "can not get irq_desc for %d\n", new);
+ continue;
+ }
+ cfg_new = desc_new->chip_data;
+
+ if (cfg_new->vector != 0)
continue;
- /* check if need to create one */
- if (!cfg_new)
- cfg_new = irq_cfg_alloc(new);
- if (__assign_irq_vector(new, TARGET_CPUS) == 0)
+ if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0)
irq = new;
break;
}
@@ -2911,15 +3215,21 @@ unsigned int create_irq_nr(unsigned int irq_want)
if (irq > 0) {
dynamic_irq_init(irq);
+ /* restore it, in case dynamic_irq_init clear it */
+ if (desc_new)
+ desc_new->chip_data = cfg_new;
}
return irq;
}
+static int nr_irqs_gsi = NR_IRQS_LEGACY;
int create_irq(void)
{
+ unsigned int irq_want;
int irq;
- irq = create_irq_nr(nr_irqs - 1);
+ irq_want = nr_irqs_gsi;
+ irq = create_irq_nr(irq_want);
if (irq == 0)
irq = -1;
@@ -2930,14 +3240,22 @@ int create_irq(void)
void destroy_irq(unsigned int irq)
{
unsigned long flags;
+ struct irq_cfg *cfg;
+ struct irq_desc *desc;
+ /* store it, in case dynamic_irq_cleanup clear it */
+ desc = irq_to_desc(irq);
+ cfg = desc->chip_data;
dynamic_irq_cleanup(irq);
+ /* connect back irq_cfg */
+ if (desc)
+ desc->chip_data = cfg;
#ifdef CONFIG_INTR_REMAP
free_irte(irq);
#endif
spin_lock_irqsave(&vector_lock, flags);
- __clear_irq_vector(irq);
+ __clear_irq_vector(irq, cfg);
spin_unlock_irqrestore(&vector_lock, flags);
}
@@ -2950,16 +3268,13 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
struct irq_cfg *cfg;
int err;
unsigned dest;
- cpumask_t tmp;
- tmp = TARGET_CPUS;
- err = assign_irq_vector(irq, tmp);
+ cfg = irq_cfg(irq);
+ err = assign_irq_vector(irq, cfg, TARGET_CPUS);
if (err)
return err;
- cfg = irq_cfg(irq);
- cpus_and(tmp, cfg->domain, tmp);
- dest = cpu_mask_to_apicid(tmp);
+ dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
#ifdef CONFIG_INTR_REMAP
if (irq_remapped(irq)) {
@@ -3013,64 +3328,48 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
}
#ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
+ struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
struct msi_msg msg;
unsigned int dest;
- cpumask_t tmp;
- struct irq_desc *desc;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
+ dest = set_desc_affinity(desc, mask);
+ if (dest == BAD_APICID)
return;
- if (assign_irq_vector(irq, mask))
- return;
-
- cfg = irq_cfg(irq);
- cpus_and(tmp, cfg->domain, mask);
- dest = cpu_mask_to_apicid(tmp);
+ cfg = desc->chip_data;
- read_msi_msg(irq, &msg);
+ read_msi_msg_desc(desc, &msg);
msg.data &= ~MSI_DATA_VECTOR_MASK;
msg.data |= MSI_DATA_VECTOR(cfg->vector);
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
- write_msi_msg(irq, &msg);
- desc = irq_to_desc(irq);
- desc->affinity = mask;
+ write_msi_msg_desc(desc, &msg);
}
-
#ifdef CONFIG_INTR_REMAP
/*
* Migrate the MSI irq to another cpumask. This migration is
* done in the process context using interrupt-remapping hardware.
*/
-static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+static void
+ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
+ struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
unsigned int dest;
- cpumask_t tmp, cleanup_mask;
struct irte irte;
- struct irq_desc *desc;
-
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
- return;
if (get_irte(irq, &irte))
return;
- if (assign_irq_vector(irq, mask))
+ dest = set_desc_affinity(desc, mask);
+ if (dest == BAD_APICID)
return;
- cfg = irq_cfg(irq);
- cpus_and(tmp, cfg->domain, mask);
- dest = cpu_mask_to_apicid(tmp);
-
irte.vector = cfg->vector;
irte.dest_id = IRTE_DEST(dest);
@@ -3084,16 +3383,10 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
* at the new destination. So, time to cleanup the previous
* vector allocation.
*/
- if (cfg->move_in_progress) {
- cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
- cfg->move_cleanup_count = cpus_weight(cleanup_mask);
- send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
- cfg->move_in_progress = 0;
- }
-
- desc = irq_to_desc(irq);
- desc->affinity = mask;
+ if (cfg->move_in_progress)
+ send_cleanup_vector(cfg);
}
+
#endif
#endif /* CONFIG_SMP */
@@ -3152,7 +3445,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
}
#endif
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
{
int ret;
struct msi_msg msg;
@@ -3161,7 +3454,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
if (ret < 0)
return ret;
- set_irq_msi(irq, desc);
+ set_irq_msi(irq, msidesc);
write_msi_msg(irq, &msg);
#ifdef CONFIG_INTR_REMAP
@@ -3181,26 +3474,13 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
return 0;
}
-static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
-{
- unsigned int irq;
-
- irq = dev->bus->number;
- irq <<= 8;
- irq |= dev->devfn;
- irq <<= 12;
-
- return irq;
-}
-
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
{
unsigned int irq;
int ret;
unsigned int irq_want;
- irq_want = build_irq_for_pci_dev(dev) + 0x100;
-
+ irq_want = nr_irqs_gsi;
irq = create_irq_nr(irq_want);
if (irq == 0)
return -1;
@@ -3214,7 +3494,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
goto error;
no_ir:
#endif
- ret = setup_msi_irq(dev, desc, irq);
+ ret = setup_msi_irq(dev, msidesc, irq);
if (ret < 0) {
destroy_irq(irq);
return ret;
@@ -3232,7 +3512,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
unsigned int irq;
int ret, sub_handle;
- struct msi_desc *desc;
+ struct msi_desc *msidesc;
unsigned int irq_want;
#ifdef CONFIG_INTR_REMAP
@@ -3240,10 +3520,11 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
int index = 0;
#endif
- irq_want = build_irq_for_pci_dev(dev) + 0x100;
+ irq_want = nr_irqs_gsi;
sub_handle = 0;
- list_for_each_entry(desc, &dev->msi_list, list) {
- irq = create_irq_nr(irq_want--);
+ list_for_each_entry(msidesc, &dev->msi_list, list) {
+ irq = create_irq_nr(irq_want);
+ irq_want++;
if (irq == 0)
return -1;
#ifdef CONFIG_INTR_REMAP
@@ -3275,7 +3556,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
}
no_ir:
#endif
- ret = setup_msi_irq(dev, desc, irq);
+ ret = setup_msi_irq(dev, msidesc, irq);
if (ret < 0)
goto error;
sub_handle++;
@@ -3294,24 +3575,18 @@ void arch_teardown_msi_irq(unsigned int irq)
#ifdef CONFIG_DMAR
#ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
+static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
{
+ struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
struct msi_msg msg;
unsigned int dest;
- cpumask_t tmp;
- struct irq_desc *desc;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
+ dest = set_desc_affinity(desc, mask);
+ if (dest == BAD_APICID)
return;
- if (assign_irq_vector(irq, mask))
- return;
-
- cfg = irq_cfg(irq);
- cpus_and(tmp, cfg->domain, mask);
- dest = cpu_mask_to_apicid(tmp);
+ cfg = desc->chip_data;
dmar_msi_read(irq, &msg);
@@ -3321,9 +3596,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
dmar_msi_write(irq, &msg);
- desc = irq_to_desc(irq);
- desc->affinity = mask;
}
+
#endif /* CONFIG_SMP */
struct irq_chip dmar_msi_type = {
@@ -3355,24 +3629,18 @@ int arch_setup_dmar_msi(unsigned int irq)
#ifdef CONFIG_HPET_TIMER
#ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
+static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
{
+ struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
- struct irq_desc *desc;
struct msi_msg msg;
unsigned int dest;
- cpumask_t tmp;
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
+ dest = set_desc_affinity(desc, mask);
+ if (dest == BAD_APICID)
return;
- if (assign_irq_vector(irq, mask))
- return;
-
- cfg = irq_cfg(irq);
- cpus_and(tmp, cfg->domain, mask);
- dest = cpu_mask_to_apicid(tmp);
+ cfg = desc->chip_data;
hpet_msi_read(irq, &msg);
@@ -3382,9 +3650,8 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
hpet_msi_write(irq, &msg);
- desc = irq_to_desc(irq);
- desc->affinity = mask;
}
+
#endif /* CONFIG_SMP */
struct irq_chip hpet_msi_type = {
@@ -3437,28 +3704,21 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
write_ht_irq_msg(irq, &msg);
}
-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
+static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
+ struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
unsigned int dest;
- cpumask_t tmp;
- struct irq_desc *desc;
-
- cpus_and(tmp, mask, cpu_online_map);
- if (cpus_empty(tmp))
- return;
- if (assign_irq_vector(irq, mask))
+ dest = set_desc_affinity(desc, mask);
+ if (dest == BAD_APICID)
return;
- cfg = irq_cfg(irq);
- cpus_and(tmp, cfg->domain, mask);
- dest = cpu_mask_to_apicid(tmp);
+ cfg = desc->chip_data;
target_ht_irq(irq, dest, cfg->vector);
- desc = irq_to_desc(irq);
- desc->affinity = mask;
}
+
#endif
static struct irq_chip ht_irq_chip = {
@@ -3476,17 +3736,14 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
{
struct irq_cfg *cfg;
int err;
- cpumask_t tmp;
- tmp = TARGET_CPUS;
- err = assign_irq_vector(irq, tmp);
+ cfg = irq_cfg(irq);
+ err = assign_irq_vector(irq, cfg, TARGET_CPUS);
if (!err) {
struct ht_irq_msg msg;
unsigned dest;
- cfg = irq_cfg(irq);
- cpus_and(tmp, cfg->domain, tmp);
- dest = cpu_mask_to_apicid(tmp);
+ dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
@@ -3522,7 +3779,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
unsigned long mmr_offset)
{
- const cpumask_t *eligible_cpu = get_cpu_mask(cpu);
+ const struct cpumask *eligible_cpu = cpumask_of(cpu);
struct irq_cfg *cfg;
int mmr_pnode;
unsigned long mmr_value;
@@ -3530,7 +3787,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
unsigned long flags;
int err;
- err = assign_irq_vector(irq, *eligible_cpu);
+ cfg = irq_cfg(irq);
+
+ err = assign_irq_vector(irq, cfg, eligible_cpu);
if (err != 0)
return err;
@@ -3539,8 +3798,6 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
irq_name);
spin_unlock_irqrestore(&vector_lock, flags);
- cfg = irq_cfg(irq);
-
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
@@ -3551,7 +3808,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
entry->polarity = 0;
entry->trigger = 0;
entry->mask = 0;
- entry->dest = cpu_mask_to_apicid(*eligible_cpu);
+ entry->dest = cpu_mask_to_apicid(eligible_cpu);
mmr_pnode = uv_blade_to_pnode(mmr_blade);
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -3592,29 +3849,16 @@ int __init io_apic_get_redir_entries (int ioapic)
return reg_01.bits.entries;
}
-int __init probe_nr_irqs(void)
+void __init probe_nr_irqs_gsi(void)
{
int idx;
int nr = 0;
-#ifndef CONFIG_XEN
- int nr_min = 32;
-#else
- int nr_min = NR_IRQS;
-#endif
for (idx = 0; idx < nr_ioapics; idx++)
nr += io_apic_get_redir_entries(idx) + 1;
- /* double it for hotplug and msi and nmi */
- nr <<= 1;
-
- /* something wrong ? */
- if (nr < nr_min)
- nr = nr_min;
- if (WARN_ON(nr > NR_IRQS))
- nr = NR_IRQS;
-
- return nr;
+ if (nr > nr_irqs_gsi)
+ nr_irqs_gsi = nr;
}
/* --------------------------------------------------------------------------
@@ -3713,19 +3957,31 @@ int __init io_apic_get_version(int ioapic)
int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
{
+ struct irq_desc *desc;
+ struct irq_cfg *cfg;
+ int cpu = boot_cpu_id;
+
if (!IO_APIC_IRQ(irq)) {
apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
ioapic);
return -EINVAL;
}
+ desc = irq_to_desc_alloc_cpu(irq, cpu);
+ if (!desc) {
+ printk(KERN_INFO "can not get irq_desc %d\n", irq);
+ return 0;
+ }
+
/*
* IRQs < 16 are already in the irq_2_pin[] map
*/
- if (irq >= 16)
- add_pin_to_irq(irq, ioapic, pin);
+ if (irq >= NR_IRQS_LEGACY) {
+ cfg = desc->chip_data;
+ add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
+ }
- setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
+ setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
return 0;
}
@@ -3761,7 +4017,9 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
void __init setup_ioapic_dest(void)
{
int pin, ioapic, irq, irq_entry;
+ struct irq_desc *desc;
struct irq_cfg *cfg;
+ const struct cpumask *mask;
if (skip_ioapic_setup == 1)
return;
@@ -3777,17 +4035,31 @@ void __init setup_ioapic_dest(void)
* when you have too many devices, because at that time only boot
* cpu is online.
*/
- cfg = irq_cfg(irq);
- if (!cfg->vector)
- setup_IO_APIC_irq(ioapic, pin, irq,
+ desc = irq_to_desc(irq);
+ cfg = desc->chip_data;
+ if (!cfg->vector) {
+ setup_IO_APIC_irq(ioapic, pin, irq, desc,
irq_trigger(irq_entry),
irq_polarity(irq_entry));
+ continue;
+
+ }
+
+ /*
+ * Honour affinities which have been set in early boot
+ */
+ if (desc->status &
+ (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
+ mask = &desc->affinity;
+ else
+ mask = TARGET_CPUS;
+
#ifdef CONFIG_INTR_REMAP
- else if (intr_remapping_enabled)
- set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
-#endif
+ if (intr_remapping_enabled)
+ set_ir_ioapic_affinity_irq_desc(desc, mask);
else
- set_ioapic_affinity_irq(irq, TARGET_CPUS);
+#endif
+ set_ioapic_affinity_irq_desc(desc, mask);
}
}
@@ -3836,7 +4108,6 @@ void __init ioapic_init_mappings(void)
struct resource *ioapic_res;
int i;
- irq_2_pin_init();
ioapic_res = ioapic_setup_resources();
for (i = 0; i < nr_ioapics; i++) {
if (smp_found_config) {
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index f1c688e46f35..285bbf8831fa 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector)
/*
* This is only used on smaller machines.
*/
-void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
+void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector)
{
- unsigned long mask = cpus_addr(cpumask)[0];
+ unsigned long mask = cpumask_bits(cpumask)[0];
unsigned long flags;
local_irq_save(flags);
- WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
+ WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
__send_IPI_dest_field(mask, vector);
local_irq_restore(flags);
}
-void send_IPI_mask_sequence(cpumask_t mask, int vector)
+void send_IPI_mask_sequence(const struct cpumask *mask, int vector)
{
unsigned long flags;
unsigned int query_cpu;
@@ -139,12 +139,24 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
*/
local_irq_save(flags);
- for_each_possible_cpu(query_cpu) {
- if (cpu_isset(query_cpu, mask)) {
+ for_each_cpu(query_cpu, mask)
+ __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
+ local_irq_restore(flags);
+}
+
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+{
+ unsigned long flags;
+ unsigned int query_cpu;
+ unsigned int this_cpu = smp_processor_id();
+
+ /* See Hack comment above */
+
+ local_irq_save(flags);
+ for_each_cpu(query_cpu, mask)
+ if (query_cpu != this_cpu)
__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
vector);
- }
- }
local_irq_restore(flags);
}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index d1d4dc52f649..3f1d9d18df67 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -118,6 +118,9 @@ int show_interrupts(struct seq_file *p, void *v)
}
desc = irq_to_desc(i);
+ if (!desc)
+ return 0;
+
spin_lock_irqsave(&desc->lock, flags);
#ifndef CONFIG_SMP
any_count = kstat_irqs(i);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index a51382672de0..9dc5588f336a 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -233,25 +233,28 @@ unsigned int do_IRQ(struct pt_regs *regs)
#ifdef CONFIG_HOTPLUG_CPU
#include <mach_apic.h>
-void fixup_irqs(cpumask_t map)
+/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
+void fixup_irqs(void)
{
unsigned int irq;
static int warned;
struct irq_desc *desc;
for_each_irq_desc(irq, desc) {
- cpumask_t mask;
+ const struct cpumask *affinity;
+ if (!desc)
+ continue;
if (irq == 2)
continue;
- cpus_and(mask, desc->affinity, map);
- if (any_online_cpu(mask) == NR_CPUS) {
+ affinity = &desc->affinity;
+ if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
printk("Breaking affinity for irq %i\n", irq);
- mask = map;
+ affinity = cpu_all_mask;
}
if (desc->chip->set_affinity)
- desc->chip->set_affinity(irq, mask);
+ desc->chip->set_affinity(irq, affinity);
else if (desc->action && !(warned++))
printk("Cannot set affinity for irq %i\n", irq);
}
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 60eb84eb77a0..fca2991443f5 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -83,40 +83,43 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
}
#ifdef CONFIG_HOTPLUG_CPU
-void fixup_irqs(cpumask_t map)
+/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
+void fixup_irqs(void)
{
unsigned int irq;
static int warned;
struct irq_desc *desc;
for_each_irq_desc(irq, desc) {
- cpumask_t mask;
int break_affinity = 0;
int set_affinity = 1;
+ const struct cpumask *affinity;
+ if (!desc)
+ continue;
if (irq == 2)
continue;
/* interrupt's are disabled at this point */
spin_lock(&desc->lock);
+ affinity = &desc->affinity;
if (!irq_has_action(irq) ||
- cpus_equal(desc->affinity, map)) {
+ cpumask_equal(affinity, cpu_online_mask)) {
spin_unlock(&desc->lock);
continue;
}
- cpus_and(mask, desc->affinity, map);
- if (cpus_empty(mask)) {
+ if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
break_affinity = 1;
- mask = map;
+ affinity = cpu_all_mask;
}
if (desc->chip->mask)
desc->chip->mask(irq);
if (desc->chip->set_affinity)
- desc->chip->set_affinity(irq, mask);
+ desc->chip->set_affinity(irq, affinity);
else if (!(warned++))
set_affinity = 0;
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 845aa9803e80..6a92f47c52e7 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -68,8 +68,7 @@ void __init init_ISA_irqs (void)
/*
* 16 old-style INTA-cycle interrupts:
*/
- for (i = 0; i < 16; i++) {
- /* first time call this irq_desc */
+ for (i = 0; i < NR_IRQS_LEGACY; i++) {
struct irq_desc *desc = irq_to_desc(i);
desc->status = IRQ_DISABLED;
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index ff0235391285..40c1e62ec785 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -142,8 +142,7 @@ void __init init_ISA_irqs(void)
init_bsp_APIC();
init_8259A(0);
- for (i = 0; i < 16; i++) {
- /* first time call this irq_desc */
+ for (i = 0; i < NR_IRQS_LEGACY; i++) {
struct irq_desc *desc = irq_to_desc(i);
desc->status = IRQ_DISABLED;
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 774ac4991568..e169ae9b6a62 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -128,7 +128,7 @@ static int kvm_register_clock(char *txt)
}
#ifdef CONFIG_X86_LOCAL_APIC
-static void kvm_setup_secondary_clock(void)
+static void __cpuinit kvm_setup_secondary_clock(void)
{
/*
* Now that the first cpu already had this clocksource initialized,
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 3b599518c322..c12314c9e86f 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -287,7 +287,7 @@ static struct clock_event_device mfgpt_clockevent = {
.set_mode = mfgpt_set_mode,
.set_next_event = mfgpt_next_event,
.rating = 250,
- .cpumask = CPU_MASK_ALL,
+ .cpumask = cpu_all_mask,
.shift = 32
};
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index f98f4e1dba09..45e3b69808ba 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -586,23 +586,23 @@ static void __init __get_smp_config(unsigned int early)
{
struct intel_mp_floating *mpf = mpf_found;
- if (x86_quirks->mach_get_smp_config) {
- if (x86_quirks->mach_get_smp_config(early))
- return;
- }
+ if (!mpf)
+ return;
+
if (acpi_lapic && early)
return;
+
/*
- * ACPI supports both logical (e.g. Hyper-Threading) and physical
- * processors, where MPS only supports physical.
+ * MPS doesn't support hyperthreading, aka only have
+ * thread 0 apic id in MPS table
*/
- if (acpi_lapic && acpi_ioapic) {
- printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
- "information\n");
+ if (acpi_lapic && acpi_ioapic)
return;
- } else if (acpi_lapic)
- printk(KERN_INFO "Using ACPI for processor (LAPIC) "
- "configuration information\n");
+
+ if (x86_quirks->mach_get_smp_config) {
+ if (x86_quirks->mach_get_smp_config(early))
+ return;
+ }
printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
mpf->mpf_specification);
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index 4caff39078e0..0deea37a53cf 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -31,7 +31,7 @@
#include <asm/numaq.h>
#include <asm/topology.h>
#include <asm/processor.h>
-#include <asm/mpspec.h>
+#include <asm/genapic.h>
#include <asm/e820.h>
#include <asm/setup.h>
@@ -235,6 +235,13 @@ static int __init numaq_setup_ioapic_ids(void)
return 1;
}
+static int __init numaq_update_genapic(void)
+{
+ genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi;
+
+ return 0;
+}
+
static struct x86_quirks numaq_x86_quirks __initdata = {
.arch_pre_time_init = numaq_pre_time_init,
.arch_time_init = NULL,
@@ -250,6 +257,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
.mpc_oem_pci_bus = mpc_oem_pci_bus,
.smp_read_mpc_oem = smp_read_mpc_oem,
.setup_ioapic_ids = numaq_setup_ioapic_ids,
+ .update_genapic = numaq_update_genapic,
};
void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 0e9f1982b1dd..95777b0faa73 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -7,7 +7,8 @@
#include <asm/paravirt.h>
-static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags)
+static inline void
+default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
{
__raw_spin_lock(lock);
}
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index e1e731d78f38..d28bbdc35e4e 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1567,7 +1567,7 @@ static int __init calgary_parse_options(char *p)
++p;
if (*p == '\0')
break;
- bridge = simple_strtol(p, &endp, 0);
+ bridge = simple_strtoul(p, &endp, 0);
if (p == endp)
break;
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index a42b02b4df68..ba7ad83e20a8 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -123,6 +123,8 @@ static void free_iommu(unsigned long offset, int size)
spin_lock_irqsave(&iommu_bitmap_lock, flags);
iommu_area_free(iommu_gart_bitmap, offset, size);
+ if (offset >= next_bit)
+ next_bit = offset + size;
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c622772744d8..95d811a9594f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -7,7 +7,9 @@
#include <linux/module.h>
#include <linux/pm.h>
#include <linux/clockchips.h>
+#include <linux/ftrace.h>
#include <asm/system.h>
+#include <asm/apic.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
@@ -100,6 +102,9 @@ static inline int hlt_use_halt(void)
void default_idle(void)
{
if (hlt_use_halt()) {
+ struct power_trace it;
+
+ trace_power_start(&it, POWER_CSTATE, 1);
current_thread_info()->status &= ~TS_POLLING;
/*
* TS_POLLING-cleared state must be visible before we
@@ -112,6 +117,7 @@ void default_idle(void)
else
local_irq_enable();
current_thread_info()->status |= TS_POLLING;
+ trace_power_end(&it);
} else {
local_irq_enable();
/* loop is done by the caller */
@@ -122,6 +128,21 @@ void default_idle(void)
EXPORT_SYMBOL(default_idle);
#endif
+void stop_this_cpu(void *dummy)
+{
+ local_irq_disable();
+ /*
+ * Remove this CPU:
+ */
+ cpu_clear(smp_processor_id(), cpu_online_map);
+ disable_local_APIC();
+
+ for (;;) {
+ if (hlt_works(smp_processor_id()))
+ halt();
+ }
+}
+
static void do_nothing(void *unused)
{
}
@@ -154,24 +175,31 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
*/
void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
{
+ struct power_trace it;
+
+ trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
if (!need_resched()) {
__monitor((void *)&current_thread_info()->flags, 0, 0);
smp_mb();
if (!need_resched())
__mwait(ax, cx);
}
+ trace_power_end(&it);
}
/* Default MONITOR/MWAIT with no hints, used for default C1 state */
static void mwait_idle(void)
{
+ struct power_trace it;
if (!need_resched()) {
+ trace_power_start(&it, POWER_CSTATE, 1);
__monitor((void *)&current_thread_info()->flags, 0, 0);
smp_mb();
if (!need_resched())
__sti_mwait(0, 0);
else
local_irq_enable();
+ trace_power_end(&it);
} else
local_irq_enable();
}
@@ -183,9 +211,13 @@ static void mwait_idle(void)
*/
static void poll_idle(void)
{
+ struct power_trace it;
+
+ trace_power_start(&it, POWER_CSTATE, 0);
local_irq_enable();
while (!need_resched())
cpu_relax();
+ trace_power_end(&it);
}
/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 0a1302fe6d45..24c2276aa453 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -38,6 +38,7 @@
#include <linux/percpu.h>
#include <linux/prctl.h>
#include <linux/dmi.h>
+#include <linux/ftrace.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -548,7 +549,8 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
* the task-switch, and shows up in ret_from_fork in entry.S,
* for example.
*/
-struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+__notrace_funcgraph struct task_struct *
+__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c958120fb1b6..fbb321d53d34 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -39,6 +39,7 @@
#include <linux/prctl.h>
#include <linux/uaccess.h>
#include <linux/io.h>
+#include <linux/ftrace.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@@ -551,8 +552,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
* - could test fs/gs bitsliced
*
* Kprobes not supported here. Set the probe on schedule instead.
+ * Function graph tracer not supported too.
*/
-struct task_struct *
+__notrace_funcgraph struct task_struct *
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a6d8c12e10d..2c8ec1ba75e6 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -668,14 +668,14 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
size_t bts_index, bts_end;
int error;
- error = ds_get_bts_end(child, &bts_end);
+ error = ds_get_bts_end(child->bts, &bts_end);
if (error < 0)
return error;
if (bts_end <= index)
return -EINVAL;
- error = ds_get_bts_index(child, &bts_index);
+ error = ds_get_bts_index(child->bts, &bts_index);
if (error < 0)
return error;
@@ -684,7 +684,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
if (bts_end <= bts_index)
bts_index -= bts_end;
- error = ds_access_bts(child, bts_index, &bts_record);
+ error = ds_access_bts(child->bts, bts_index, &bts_record);
if (error < 0)
return error;
@@ -705,14 +705,14 @@ static int ptrace_bts_drain(struct task_struct *child,
size_t end, i;
int error;
- error = ds_get_bts_index(child, &end);
+ error = ds_get_bts_index(child->bts, &end);
if (error < 0)
return error;
if (size < (end * sizeof(struct bts_struct)))
return -EIO;
- error = ds_access_bts(child, 0, (const void **)&raw);
+ error = ds_access_bts(child->bts, 0, (const void **)&raw);
if (error < 0)
return error;
@@ -723,18 +723,13 @@ static int ptrace_bts_drain(struct task_struct *child,
return -EFAULT;
}
- error = ds_clear_bts(child);
+ error = ds_clear_bts(child->bts);
if (error < 0)
return error;
return end;
}
-static void ptrace_bts_ovfl(struct task_struct *child)
-{
- send_sig(child->thread.bts_ovfl_signal, child, 0);
-}
-
static int ptrace_bts_config(struct task_struct *child,
long cfg_size,
const struct ptrace_bts_config __user *ucfg)
@@ -760,23 +755,45 @@ static int ptrace_bts_config(struct task_struct *child,
goto errout;
if (cfg.flags & PTRACE_BTS_O_ALLOC) {
- ds_ovfl_callback_t ovfl = NULL;
+ bts_ovfl_callback_t ovfl = NULL;
unsigned int sig = 0;
- /* we ignore the error in case we were not tracing child */
- (void)ds_release_bts(child);
+ error = -EINVAL;
+ if (cfg.size < (10 * bts_cfg.sizeof_bts))
+ goto errout;
if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
if (!cfg.signal)
goto errout;
+ error = -EOPNOTSUPP;
+ goto errout;
+
sig = cfg.signal;
- ovfl = ptrace_bts_ovfl;
}
- error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl);
- if (error < 0)
+ if (child->bts) {
+ (void)ds_release_bts(child->bts);
+ kfree(child->bts_buffer);
+
+ child->bts = NULL;
+ child->bts_buffer = NULL;
+ }
+
+ error = -ENOMEM;
+ child->bts_buffer = kzalloc(cfg.size, GFP_KERNEL);
+ if (!child->bts_buffer)
+ goto errout;
+
+ child->bts = ds_request_bts(child, child->bts_buffer, cfg.size,
+ ovfl, /* th = */ (size_t)-1);
+ if (IS_ERR(child->bts)) {
+ error = PTR_ERR(child->bts);
+ kfree(child->bts_buffer);
+ child->bts = NULL;
+ child->bts_buffer = NULL;
goto errout;
+ }
child->thread.bts_ovfl_signal = sig;
}
@@ -823,15 +840,15 @@ static int ptrace_bts_status(struct task_struct *child,
if (cfg_size < sizeof(cfg))
return -EIO;
- error = ds_get_bts_end(child, &end);
+ error = ds_get_bts_end(child->bts, &end);
if (error < 0)
return error;
- error = ds_access_bts(child, /* index = */ 0, &base);
+ error = ds_access_bts(child->bts, /* index = */ 0, &base);
if (error < 0)
return error;
- error = ds_access_bts(child, /* index = */ end, &max);
+ error = ds_access_bts(child->bts, /* index = */ end, &max);
if (error < 0)
return error;
@@ -884,10 +901,7 @@ static int ptrace_bts_write_record(struct task_struct *child,
return -EINVAL;
}
- /* The writing task will be the switched-to task on a context
- * switch. It needs to write into the switched-from task's BTS
- * buffer. */
- return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
+ return ds_write_bts(child->bts, bts_record, bts_cfg.sizeof_bts);
}
void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -929,17 +943,16 @@ void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
switch (c->x86) {
case 0x6:
switch (c->x86_model) {
+ case 0 ... 0xC:
+ /* sorry, don't know about them */
+ break;
case 0xD:
case 0xE: /* Pentium M */
bts_configure(&bts_cfg_pentium_m);
break;
- case 0xF: /* Core2 */
- case 0x1C: /* Atom */
+ default: /* Core2, Atom, ... */
bts_configure(&bts_cfg_core2);
break;
- default:
- /* sorry, don't know about them */
- break;
}
break;
case 0xF:
@@ -973,13 +986,17 @@ void ptrace_disable(struct task_struct *child)
clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
#endif
#ifdef CONFIG_X86_PTRACE_BTS
- (void)ds_release_bts(child);
+ if (child->bts) {
+ (void)ds_release_bts(child->bts);
+ kfree(child->bts_buffer);
+ child->bts_buffer = NULL;
- child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
- if (!child->thread.debugctlmsr)
- clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+ child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
+ if (!child->thread.debugctlmsr)
+ clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
- clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+ clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+ }
#endif /* CONFIG_X86_PTRACE_BTS */
}
@@ -1111,9 +1128,16 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
(child, data, (struct ptrace_bts_config __user *)addr);
break;
- case PTRACE_BTS_SIZE:
- ret = ds_get_bts_index(child, /* pos = */ NULL);
+ case PTRACE_BTS_SIZE: {
+ size_t size;
+
+ ret = ds_get_bts_index(child->bts, &size);
+ if (ret == 0) {
+ BUG_ON(size != (int) size);
+ ret = (int) size;
+ }
break;
+ }
case PTRACE_BTS_GET:
ret = ptrace_bts_read_record
@@ -1121,7 +1145,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
break;
case PTRACE_BTS_CLEAR:
- ret = ds_clear_bts(child);
+ ret = ds_clear_bts(child->bts);
break;
case PTRACE_BTS_DRAIN:
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index c3cd512484e5..ba7b9a0e6063 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -39,7 +39,10 @@ int reboot_force;
static int reboot_cpu = -1;
#endif
-/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old]
+/* This is set by the PCI code if either type 1 or type 2 PCI is detected */
+bool port_cf9_safe = false;
+
+/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci]
warm Don't set the cold reboot flag
cold Set the cold reboot flag
bios Reboot by jumping through the BIOS (only for X86_32)
@@ -48,6 +51,7 @@ static int reboot_cpu = -1;
kbd Use the keyboard controller. cold reset (default)
acpi Use the RESET_REG in the FADT
efi Use efi reset_system runtime service
+ pci Use the so-called "PCI reset register", CF9
force Avoid anything that could hang.
*/
static int __init reboot_setup(char *str)
@@ -82,6 +86,7 @@ static int __init reboot_setup(char *str)
case 'k':
case 't':
case 'e':
+ case 'p':
reboot_type = *str;
break;
@@ -172,6 +177,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_BOARD_NAME, "0KW626"),
},
},
+ { /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */
+ .callback = set_bios_reboot,
+ .ident = "Dell OptiPlex 330",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 330"),
+ DMI_MATCH(DMI_BOARD_NAME, "0KP561"),
+ },
+ },
{ /* Handle problems with rebooting on Dell 2400's */
.callback = set_bios_reboot,
.ident = "Dell PowerEdge 2400",
@@ -398,12 +412,27 @@ static void native_machine_emergency_restart(void)
reboot_type = BOOT_KBD;
break;
-
case BOOT_EFI:
if (efi_enabled)
- efi.reset_system(reboot_mode ? EFI_RESET_WARM : EFI_RESET_COLD,
+ efi.reset_system(reboot_mode ?
+ EFI_RESET_WARM :
+ EFI_RESET_COLD,
EFI_SUCCESS, 0, NULL);
+ reboot_type = BOOT_KBD;
+ break;
+ case BOOT_CF9:
+ port_cf9_safe = true;
+ /* fall through */
+
+ case BOOT_CF9_COND:
+ if (port_cf9_safe) {
+ u8 cf9 = inb(0xcf9) & ~6;
+ outb(cf9|2, 0xcf9); /* Request hard reset */
+ udelay(50);
+ outb(cf9|6, 0xcf9); /* Actually do the reset */
+ udelay(50);
+ }
reboot_type = BOOT_KBD;
break;
}
@@ -464,6 +493,11 @@ static void native_machine_restart(char *__unused)
static void native_machine_halt(void)
{
+ /* stop other cpus and apics */
+ machine_shutdown();
+
+ /* stop this cpu */
+ stop_this_cpu(NULL);
}
static void native_machine_power_off(void)
@@ -558,10 +592,7 @@ static int crash_nmi_callback(struct notifier_block *self,
static void smp_send_nmi_allbutself(void)
{
- cpumask_t mask = cpu_online_map;
- cpu_clear(safe_smp_processor_id(), mask);
- if (!cpus_empty(mask))
- send_IPI_mask(mask, NMI_VECTOR);
+ send_IPI_allbutself(NMI_VECTOR);
}
static struct notifier_block crash_nmi_nb = {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0fa6790c1dd3..32fe42f26e79 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -583,7 +583,20 @@ static int __init setup_elfcorehdr(char *arg)
early_param("elfcorehdr", setup_elfcorehdr);
#endif
-static struct x86_quirks default_x86_quirks __initdata;
+static int __init default_update_genapic(void)
+{
+#ifdef CONFIG_X86_SMP
+# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64)
+ genapic->wakeup_cpu = wakeup_secondary_cpu_via_init;
+# endif
+#endif
+
+ return 0;
+}
+
+static struct x86_quirks default_x86_quirks __initdata = {
+ .update_genapic = default_update_genapic,
+};
struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
@@ -764,7 +777,7 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
.callback = dmi_low_memory_corruption,
.ident = "Phoenix BIOS",
.matches = {
- DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
+ DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"),
},
},
#endif
@@ -794,6 +807,9 @@ void __init setup_arch(char **cmdline_p)
printk(KERN_INFO "Command line: %s\n", boot_command_line);
#endif
+ /* VMI may relocate the fixmap; do this before touching ioremap area */
+ vmi_init();
+
early_cpu_init();
early_ioremap_init();
@@ -880,13 +896,8 @@ void __init setup_arch(char **cmdline_p)
check_efer();
#endif
-#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
- /*
- * Must be before kernel pagetables are setup
- * or fixmap area is touched.
- */
- vmi_init();
-#endif
+ /* Must be before kernel pagetables are setup */
+ vmi_activate();
/* after early param, so could get panic from serial */
reserve_early_setup_data();
@@ -1082,7 +1093,7 @@ void __init setup_arch(char **cmdline_p)
ioapic_init_mappings();
/* need to wait for io_apic is mapped */
- nr_irqs = probe_nr_irqs();
+ probe_nr_irqs_gsi();
kvm_guest_init();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index ae0c0d3bb770..0b63b08e7530 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -152,6 +152,11 @@ void __init setup_per_cpu_areas(void)
old_size = PERCPU_ENOUGH_ROOM;
align = max_t(unsigned long, PAGE_SIZE, align);
size = roundup(old_size, align);
+
+ printk(KERN_INFO
+ "NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
+ NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
+
printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
size);
@@ -168,24 +173,24 @@ void __init setup_per_cpu_areas(void)
"cpu %d has no node %d or node-local memory\n",
cpu, node);
if (ptr)
- printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n",
+ printk(KERN_DEBUG
+ "per cpu data for cpu%d at %016lx\n",
cpu, __pa(ptr));
}
else {
ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
__pa(MAX_DMA_ADDRESS));
if (ptr)
- printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n",
- cpu, node, __pa(ptr));
+ printk(KERN_DEBUG
+ "per cpu data for cpu%d on node%d "
+ "at %016lx\n",
+ cpu, node, __pa(ptr));
}
#endif
per_cpu_offset(cpu) = ptr - __per_cpu_start;
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
}
- printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
- NR_CPUS, nr_cpu_ids, nr_node_ids);
-
/* Setup percpu data maps */
setup_per_cpu_maps();
@@ -282,7 +287,7 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable)
else
cpu_clear(cpu, *mask);
- cpulist_scnprintf(buf, sizeof(buf), *mask);
+ cpulist_scnprintf(buf, sizeof(buf), mask);
printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf);
}
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 18f9b19f5f8f..49ed667b06f3 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -118,41 +118,28 @@ static void native_smp_send_reschedule(int cpu)
WARN_ON(1);
return;
}
- send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+ send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
}
void native_send_call_func_single_ipi(int cpu)
{
- send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
+ send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
}
-void native_send_call_func_ipi(cpumask_t mask)
+void native_send_call_func_ipi(const struct cpumask *mask)
{
cpumask_t allbutself;
allbutself = cpu_online_map;
cpu_clear(smp_processor_id(), allbutself);
- if (cpus_equal(mask, allbutself) &&
+ if (cpus_equal(*mask, allbutself) &&
cpus_equal(cpu_online_map, cpu_callout_map))
send_IPI_allbutself(CALL_FUNCTION_VECTOR);
else
send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
}
-static void stop_this_cpu(void *dummy)
-{
- local_irq_disable();
- /*
- * Remove this CPU:
- */
- cpu_clear(smp_processor_id(), cpu_online_map);
- disable_local_APIC();
- if (hlt_works(smp_processor_id()))
- for (;;) halt();
- for (;;);
-}
-
/*
* this function calls the 'stop' function on all other CPUs in the system.
*/
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7b1093397319..be9466788043 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -62,6 +62,7 @@
#include <asm/mtrr.h>
#include <asm/vmi.h>
#include <asm/genapic.h>
+#include <asm/setup.h>
#include <linux/mc146818rtc.h>
#include <mach_apic.h>
@@ -101,14 +102,8 @@ EXPORT_SYMBOL(smp_num_siblings);
/* Last level cache ID of each logical CPU */
DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
-/* bitmap of online cpus */
-cpumask_t cpu_online_map __read_mostly;
-EXPORT_SYMBOL(cpu_online_map);
-
cpumask_t cpu_callin_map;
cpumask_t cpu_callout_map;
-cpumask_t cpu_possible_map;
-EXPORT_SYMBOL(cpu_possible_map);
/* representing HT siblings of each logical CPU */
DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
@@ -294,9 +289,7 @@ static void __cpuinit start_secondary(void *unused)
* fragile that we want to limit the things done here to the
* most necessary things.
*/
-#ifdef CONFIG_VMI
vmi_bringup();
-#endif
cpu_init();
preempt_disable();
smp_callin();
@@ -536,7 +529,7 @@ static void impress_friends(void)
pr_debug("Before bogocount - setting activated=1.\n");
}
-static inline void __inquire_remote_apic(int apicid)
+void __inquire_remote_apic(int apicid)
{
unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
char *names[] = { "ID", "VERSION", "SPIV" };
@@ -575,14 +568,13 @@ static inline void __inquire_remote_apic(int apicid)
}
}
-#ifdef WAKE_SECONDARY_VIA_NMI
/*
* Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
* INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
* won't ... remember to clear down the APIC, etc later.
*/
-static int __devinit
-wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
+int __devinit
+wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
{
unsigned long send_status, accept_status = 0;
int maxlvt;
@@ -599,7 +591,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
* Give the other CPU some time to accept the IPI.
*/
udelay(200);
- if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+ if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
maxlvt = lapic_get_maxlvt();
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
@@ -614,11 +606,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
return (send_status | accept_status);
}
-#endif /* WAKE_SECONDARY_VIA_NMI */
-#ifdef WAKE_SECONDARY_VIA_INIT
-static int __devinit
-wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
+int __devinit
+wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
{
unsigned long send_status, accept_status = 0;
int maxlvt, num_starts, j;
@@ -737,7 +727,6 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
return (send_status | accept_status);
}
-#endif /* WAKE_SECONDARY_VIA_INIT */
struct create_idle {
struct work_struct work;
@@ -1355,7 +1344,7 @@ void cpu_disable_common(void)
lock_vector_lock();
remove_cpu_from_maps(cpu);
unlock_vector_lock();
- fixup_irqs(cpu_online_map);
+ fixup_irqs();
}
int native_cpu_disable(void)
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index a03e7f6d90c3..10786af95545 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -6,6 +6,7 @@
#include <linux/sched.h>
#include <linux/stacktrace.h>
#include <linux/module.h>
+#include <linux/uaccess.h>
#include <asm/stacktrace.h>
static void save_stack_warning(void *data, char *msg)
@@ -83,3 +84,66 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+
+/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
+
+struct stack_frame {
+ const void __user *next_fp;
+ unsigned long ret_addr;
+};
+
+static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+{
+ int ret;
+
+ if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
+ return 0;
+
+ ret = 1;
+ pagefault_disable();
+ if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
+ ret = 0;
+ pagefault_enable();
+
+ return ret;
+}
+
+static inline void __save_stack_trace_user(struct stack_trace *trace)
+{
+ const struct pt_regs *regs = task_pt_regs(current);
+ const void __user *fp = (const void __user *)regs->bp;
+
+ if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = regs->ip;
+
+ while (trace->nr_entries < trace->max_entries) {
+ struct stack_frame frame;
+
+ frame.next_fp = NULL;
+ frame.ret_addr = 0;
+ if (!copy_stack_frame(fp, &frame))
+ break;
+ if ((unsigned long)fp < regs->sp)
+ break;
+ if (frame.ret_addr) {
+ trace->entries[trace->nr_entries++] =
+ frame.ret_addr;
+ }
+ if (fp == frame.next_fp)
+ break;
+ fp = frame.next_fp;
+ }
+}
+
+void save_stack_trace_user(struct stack_trace *trace)
+{
+ /*
+ * Trace user stack if we are not a kernel thread
+ */
+ if (current->mm) {
+ __save_stack_trace_user(trace);
+ }
+ if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index f4049f3513b6..174ea90d1cbd 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -164,7 +164,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
* We have to send the IPI only to
* CPUs affected.
*/
- send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+ send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
while (!cpus_empty(flush_cpumask))
/* nothing. lockup detection does not belong here */
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 8f919ca69494..de6f1bda0c50 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -191,7 +191,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
* We have to send the IPI only to
* CPUs affected.
*/
- send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
+ send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
while (!cpus_empty(f->flush_cpumask))
cpu_relax();
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 9ffb01c31c40..1c0dfbca87c1 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -46,7 +46,9 @@ static __cpuinit void check_tsc_warp(void)
cycles_t start, now, prev, end;
int i;
+ rdtsc_barrier();
start = get_cycles();
+ rdtsc_barrier();
/*
* The measurement runs for 20 msecs:
*/
@@ -61,7 +63,9 @@ static __cpuinit void check_tsc_warp(void)
*/
__raw_spin_lock(&sync_lock);
prev = last_tsc;
+ rdtsc_barrier();
now = get_cycles();
+ rdtsc_barrier();
last_tsc = now;
__raw_spin_unlock(&sync_lock);
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 8b6c393ab9fd..22fd6577156a 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -960,8 +960,6 @@ static inline int __init activate_vmi(void)
void __init vmi_init(void)
{
- unsigned long flags;
-
if (!vmi_rom)
probe_vmi_rom();
else
@@ -973,13 +971,21 @@ void __init vmi_init(void)
reserve_top_address(-vmi_rom->virtual_top);
- local_irq_save(flags);
- activate_vmi();
-
#ifdef CONFIG_X86_IO_APIC
/* This is virtual hardware; timer routing is wired correctly */
no_timer_check = 1;
#endif
+}
+
+void vmi_activate(void)
+{
+ unsigned long flags;
+
+ if (!vmi_rom)
+ return;
+
+ local_irq_save(flags);
+ activate_vmi();
local_irq_restore(flags & X86_EFLAGS_IF);
}
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 254ee07f8635..c4c1f9e09402 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void)
/* Upper bound is clockevent's use of ulong for cycle deltas. */
evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt);
evt->min_delta_ns = clockevent_delta2ns(1, evt);
- evt->cpumask = cpumask_of_cpu(cpu);
+ evt->cpumask = cpumask_of(cpu);
printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n",
evt->name, evt->mult, evt->shift);
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 0b8b6690a86d..6f3d3d4cd973 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -17,6 +17,9 @@
* want per guest time just set the kernel.vsyscall64 sysctl to 0.
*/
+/* Disable profiling for userspace code: */
+#define DISABLE_BRANCH_PROFILING
+
#include <linux/time.h>
#include <linux/init.h>
#include <linux/kernel.h>
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index b13acb75e822..15c3e6999182 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -310,7 +310,7 @@ static void __init setup_xstate_init(void)
/*
* Enable and initialize the xsave feature.
*/
-void __init xsave_cntxt_init(void)
+void __ref xsave_cntxt_init(void)
{
unsigned int eax, ebx, ecx, edx;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index ce3251ce5504..b81125f0bdee 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -20,6 +20,8 @@ if VIRTUALIZATION
config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM
+ # for device assignment:
+ depends on PCI
select PREEMPT_NOTIFIERS
select MMU_NOTIFIER
select ANON_INODES
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 8772dc946823..59ebd37ad79e 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -548,8 +548,10 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
mutex_lock(&kvm->lock);
pit->irq_source_id = kvm_request_irq_source_id(kvm);
mutex_unlock(&kvm->lock);
- if (pit->irq_source_id < 0)
+ if (pit->irq_source_id < 0) {
+ kfree(pit);
return NULL;
+ }
mutex_init(&pit->pit_state.lock);
mutex_lock(&pit->pit_state.lock);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2a5e64881d9b..410ddbc1aa2e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -314,7 +314,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
if (r)
goto out;
r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache,
- rmap_desc_cache, 1);
+ rmap_desc_cache, 4);
if (r)
goto out;
r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8);
@@ -1038,13 +1038,13 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
}
rmap_write_protect(vcpu->kvm, sp->gfn);
+ kvm_unlink_unsync_page(vcpu->kvm, sp);
if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
kvm_mmu_zap_page(vcpu->kvm, sp);
return 1;
}
kvm_mmu_flush_tlb(vcpu);
- kvm_unlink_unsync_page(vcpu->kvm, sp);
return 0;
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 613ec9aa674a..84eee43bbe74 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -331,6 +331,7 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2],
&curr_pte, sizeof(curr_pte));
if (r || curr_pte != gw->ptes[level - 2]) {
+ kvm_mmu_put_page(shadow_page, sptep);
kvm_release_pfn_clean(sw->pfn);
sw->sptep = NULL;
return 1;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2643b430d83a..a4018b01e1f9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3149,7 +3149,9 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
if (cpu_has_virtual_nmis()) {
if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
- if (vmx_nmi_enabled(vcpu)) {
+ if (vcpu->arch.interrupt.pending) {
+ enable_nmi_window(vcpu);
+ } else if (vmx_nmi_enabled(vcpu)) {
vcpu->arch.nmi_pending = false;
vcpu->arch.nmi_injected = true;
} else {
@@ -3564,7 +3566,8 @@ static int __init vmx_init(void)
bypass_guest_pf = 0;
kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
VMX_EPT_WRITABLE_MASK |
- VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
+ VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT |
+ VMX_EPT_IGMT_BIT);
kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
VMX_EPT_EXECUTABLE_MASK);
kvm_enable_tdp();
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 3e010d21fdd7..ec5edc339da6 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -352,6 +352,7 @@ enum vmcs_field {
#define VMX_EPT_READABLE_MASK 0x1ull
#define VMX_EPT_WRITABLE_MASK 0x2ull
#define VMX_EPT_EXECUTABLE_MASK 0x4ull
+#define VMX_EPT_IGMT_BIT (1ull << 6)
#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index a5d8e1ace1cf..104c8220a383 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -737,7 +737,7 @@ static void lguest_time_init(void)
/* We can't set cpumask in the initializer: damn C limitations! Set it
* here and register our timer device. */
- lguest_clockevent.cpumask = cpumask_of_cpu(0);
+ lguest_clockevent.cpumask = cpumask_of(0);
clockevents_register_device(&lguest_clockevent);
/* Finally, we unblock the timer interrupt. */
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 9e68075544f6..4a20b2f9a381 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -39,7 +39,7 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon
#define __do_strncpy_from_user(dst, src, count, res) \
do { \
int __d0, __d1, __d2; \
- might_sleep(); \
+ might_fault(); \
__asm__ __volatile__( \
" testl %1,%1\n" \
" jz 2f\n" \
@@ -126,7 +126,7 @@ EXPORT_SYMBOL(strncpy_from_user);
#define __do_clear_user(addr,size) \
do { \
int __d0; \
- might_sleep(); \
+ might_fault(); \
__asm__ __volatile__( \
"0: rep; stosl\n" \
" movl %2,%0\n" \
@@ -155,7 +155,7 @@ do { \
unsigned long
clear_user(void __user *to, unsigned long n)
{
- might_sleep();
+ might_fault();
if (access_ok(VERIFY_WRITE, to, n))
__do_clear_user(to, n);
return n;
@@ -197,7 +197,7 @@ long strnlen_user(const char __user *s, long n)
unsigned long mask = -__addr_ok(s);
unsigned long res, tmp;
- might_sleep();
+ might_fault();
__asm__ __volatile__(
" testl %0, %0\n"
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index f4df6e7c718b..64d6c84e6353 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -15,7 +15,7 @@
#define __do_strncpy_from_user(dst,src,count,res) \
do { \
long __d0, __d1, __d2; \
- might_sleep(); \
+ might_fault(); \
__asm__ __volatile__( \
" testq %1,%1\n" \
" jz 2f\n" \
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(strncpy_from_user);
unsigned long __clear_user(void __user *addr, unsigned long size)
{
long __d0;
- might_sleep();
+ might_fault();
/* no memory constraint because it doesn't change any memory gcc knows
about */
asm volatile(
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 3c3b471ea496..bc4c7840b2a8 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -17,6 +17,7 @@
#include <asm/bigsmp/apic.h>
#include <asm/bigsmp/ipi.h>
#include <asm/mach-default/mach_mpparse.h>
+#include <asm/mach-default/mach_wakecpu.h>
static int dmi_bigsmp; /* can be set by dmi scanners */
@@ -41,9 +42,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
{ }
};
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
{
- return cpumask_of_cpu(cpu);
+ cpus_clear(*retmask);
+ cpu_set(cpu, *retmask);
}
static int probe_bigsmp(void)
diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c
index 9e835a11a13a..e63a4a76d8cd 100644
--- a/arch/x86/mach-generic/default.c
+++ b/arch/x86/mach-generic/default.c
@@ -16,6 +16,7 @@
#include <asm/mach-default/mach_apic.h>
#include <asm/mach-default/mach_ipi.h>
#include <asm/mach-default/mach_mpparse.h>
+#include <asm/mach-default/mach_wakecpu.h>
/* should be called last. */
static int probe_default(void)
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 28459cab3ddb..4ba5ccaa1584 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -16,7 +16,19 @@
#include <asm/es7000/apic.h>
#include <asm/es7000/ipi.h>
#include <asm/es7000/mpparse.h>
-#include <asm/es7000/wakecpu.h>
+#include <asm/mach-default/mach_wakecpu.h>
+
+void __init es7000_update_genapic_to_cluster(void)
+{
+ genapic->target_cpus = target_cpus_cluster;
+ genapic->int_delivery_mode = INT_DELIVERY_MODE_CLUSTER;
+ genapic->int_dest_mode = INT_DEST_MODE_CLUSTER;
+ genapic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER;
+
+ genapic->init_apic_ldr = init_apic_ldr_cluster;
+
+ genapic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster;
+}
static int probe_es7000(void)
{
@@ -75,7 +87,7 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
}
#endif
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
{
/* Careful. Some cpus do not strictly honor the set of cpus
* specified in the interrupt destination when using lowest
@@ -85,8 +97,7 @@ static cpumask_t vector_allocation_domain(int cpu)
* deliver interrupts to the wrong hyperthread when only one
* hyperthread was specified in the interrupt desitination.
*/
- cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
- return domain;
+ *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
}
struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000);
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 71a309b122e6..511d7941364f 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -38,7 +38,7 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return 0;
}
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
{
/* Careful. Some cpus do not strictly honor the set of cpus
* specified in the interrupt destination when using lowest
@@ -48,8 +48,7 @@ static cpumask_t vector_allocation_domain(int cpu)
* deliver interrupts to the wrong hyperthread when only one
* hyperthread was specified in the interrupt desitination.
*/
- cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
- return domain;
+ *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
}
struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c
index 5a7e4619e1c4..c346d9d0226f 100644
--- a/arch/x86/mach-generic/probe.c
+++ b/arch/x86/mach-generic/probe.c
@@ -15,6 +15,7 @@
#include <asm/mpspec.h>
#include <asm/apicdef.h>
#include <asm/genapic.h>
+#include <asm/setup.h>
extern struct genapic apic_numaq;
extern struct genapic apic_summit;
@@ -57,6 +58,9 @@ static int __init parse_apic(char *arg)
}
}
+ if (x86_quirks->update_genapic)
+ x86_quirks->update_genapic();
+
/* Parsed again by __setup for debug/verbose */
return 0;
}
@@ -72,12 +76,15 @@ void __init generic_bigsmp_probe(void)
* - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
*/
- if (!cmdline_apic && genapic == &apic_default)
+ if (!cmdline_apic && genapic == &apic_default) {
if (apic_bigsmp.probe()) {
genapic = &apic_bigsmp;
+ if (x86_quirks->update_genapic)
+ x86_quirks->update_genapic();
printk(KERN_INFO "Overriding APIC driver with %s\n",
genapic->name);
}
+ }
#endif
}
@@ -94,6 +101,9 @@ void __init generic_apic_probe(void)
/* Not visible without early console */
if (!apic_probe[i])
panic("Didn't find an APIC driver");
+
+ if (x86_quirks->update_genapic)
+ x86_quirks->update_genapic();
}
printk(KERN_INFO "Using APIC driver %s\n", genapic->name);
}
@@ -108,6 +118,8 @@ int __init mps_oem_check(struct mp_config_table *mpc, char *oem,
if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) {
if (!cmdline_apic) {
genapic = apic_probe[i];
+ if (x86_quirks->update_genapic)
+ x86_quirks->update_genapic();
printk(KERN_INFO "Switched to APIC driver `%s'.\n",
genapic->name);
}
@@ -124,6 +136,8 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
if (!cmdline_apic) {
genapic = apic_probe[i];
+ if (x86_quirks->update_genapic)
+ x86_quirks->update_genapic();
printk(KERN_INFO "Switched to APIC driver `%s'.\n",
genapic->name);
}
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index 6272b5e69da6..2821ffc188b5 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -16,6 +16,7 @@
#include <asm/summit/apic.h>
#include <asm/summit/ipi.h>
#include <asm/summit/mpparse.h>
+#include <asm/mach-default/mach_wakecpu.h>
static int probe_summit(void)
{
@@ -23,7 +24,7 @@ static int probe_summit(void)
return 0;
}
-static cpumask_t vector_allocation_domain(int cpu)
+static void vector_allocation_domain(int cpu, cpumask_t *retmask)
{
/* Careful. Some cpus do not strictly honor the set of cpus
* specified in the interrupt destination when using lowest
@@ -33,8 +34,7 @@ static cpumask_t vector_allocation_domain(int cpu)
* deliver interrupts to the wrong hyperthread when only one
* hyperthread was specified in the interrupt desitination.
*/
- cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
- return domain;
+ *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
}
struct genapic apic_summit = APIC_INIT("summit", probe_summit);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 0e331652681e..a5bc05492b1e 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -7,6 +7,7 @@
* This file provides all the same external entries as smp.c but uses
* the voyager hal to provide the functionality
*/
+#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/kernel_stat.h>
@@ -62,11 +63,6 @@ static int voyager_extended_cpus = 1;
/* Used for the invalidate map that's also checked in the spinlock */
static volatile unsigned long smp_invalidate_needed;
-/* Bitmask of currently online CPUs - used by setup.c for
- /proc/cpuinfo, visible externally but still physical */
-cpumask_t cpu_online_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_online_map);
-
/* Bitmask of CPUs present in the system - exported by i386_syms.c, used
* by scheduler but indexed physically */
cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
@@ -217,8 +213,6 @@ static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
/* This is for the new dynamic CPU boot code */
cpumask_t cpu_callin_map = CPU_MASK_NONE;
cpumask_t cpu_callout_map = CPU_MASK_NONE;
-cpumask_t cpu_possible_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_possible_map);
/* The per processor IRQ masks (these are usually kept in sync) */
static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned;
@@ -678,7 +672,7 @@ void __init smp_boot_cpus(void)
/* loop over all the extended VIC CPUs and boot them. The
* Quad CPUs must be bootstrapped by their extended VIC cpu */
- for (i = 0; i < NR_CPUS; i++) {
+ for (i = 0; i < nr_cpu_ids; i++) {
if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map))
continue;
do_boot_cpu(i);
@@ -1790,6 +1784,17 @@ void __init smp_setup_processor_id(void)
x86_write_percpu(cpu_number, hard_smp_processor_id());
}
+static void voyager_send_call_func(cpumask_t callmask)
+{
+ __u32 mask = cpus_addr(callmask)[0] & ~(1 << smp_processor_id());
+ send_CPI(mask, VIC_CALL_FUNCTION_CPI);
+}
+
+static void voyager_send_call_func_single(int cpu)
+{
+ send_CPI(1 << cpu, VIC_CALL_FUNCTION_SINGLE_CPI);
+}
+
struct smp_ops smp_ops = {
.smp_prepare_boot_cpu = voyager_smp_prepare_boot_cpu,
.smp_prepare_cpus = voyager_smp_prepare_cpus,
@@ -1799,6 +1804,6 @@ struct smp_ops smp_ops = {
.smp_send_stop = voyager_smp_send_stop,
.smp_send_reschedule = voyager_smp_send_reschedule,
- .send_call_func_ipi = native_send_call_func_ipi,
- .send_call_func_single_ipi = native_send_call_func_single_ipi,
+ .send_call_func_ipi = voyager_send_call_func,
+ .send_call_func_single_ipi = voyager_send_call_func_single,
};
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index fea4565ff576..d8cc96a2738f 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -8,9 +8,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
obj-$(CONFIG_HIGHMEM) += highmem_32.o
-obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o
obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
-mmiotrace-y := pf_in.o mmio-mod.o
+mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
obj-$(CONFIG_NUMA) += numa_$(BITS).o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 31e8730fa246..21e996a70d68 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -53,7 +53,7 @@
static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
{
-#ifdef CONFIG_MMIOTRACE_HOOKS
+#ifdef CONFIG_MMIOTRACE
if (unlikely(is_kmmio_active()))
if (kmmio_handler(regs, addr) == 1)
return -1;
@@ -413,6 +413,7 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
unsigned long error_code)
{
unsigned long flags = oops_begin();
+ int sig = SIGKILL;
struct task_struct *tsk;
printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
@@ -423,8 +424,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
tsk->thread.trap_no = 14;
tsk->thread.error_code = error_code;
if (__die("Bad pagetable", regs, error_code))
- regs = NULL;
- oops_end(flags, regs, SIGKILL);
+ sig = 0;
+ oops_end(flags, regs, sig);
}
#endif
@@ -590,6 +591,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
int fault;
#ifdef CONFIG_X86_64
unsigned long flags;
+ int sig;
#endif
tsk = current;
@@ -849,11 +851,12 @@ no_context:
bust_spinlocks(0);
do_exit(SIGKILL);
#else
+ sig = SIGKILL;
if (__die("Oops", regs, error_code))
- regs = NULL;
+ sig = 0;
/* Executive summary in case the body of the oops scrolled away */
printk(KERN_EMERG "CR2: %016lx\n", address);
- oops_end(flags, regs, SIGKILL);
+ oops_end(flags, regs, sig);
#endif
/*
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 847c164725f4..8518c678d83f 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -222,6 +222,41 @@ static void __init remap_numa_kva(void)
}
}
+#ifdef CONFIG_HIBERNATION
+/**
+ * resume_map_numa_kva - add KVA mapping to the temporary page tables created
+ * during resume from hibernation
+ * @pgd_base - temporary resume page directory
+ */
+void resume_map_numa_kva(pgd_t *pgd_base)
+{
+ int node;
+
+ for_each_online_node(node) {
+ unsigned long start_va, start_pfn, size, pfn;
+
+ start_va = (unsigned long)node_remap_start_vaddr[node];
+ start_pfn = node_remap_start_pfn[node];
+ size = node_remap_size[node];
+
+ printk(KERN_DEBUG "%s: node %d\n", __FUNCTION__, node);
+
+ for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) {
+ unsigned long vaddr = start_va + (pfn << PAGE_SHIFT);
+ pgd_t *pgd = pgd_base + pgd_index(vaddr);
+ pud_t *pud = pud_offset(pgd, vaddr);
+ pmd_t *pmd = pmd_offset(pud, vaddr);
+
+ set_pmd(pmd, pfn_pmd(start_pfn + pfn,
+ PAGE_KERNEL_LARGE_EXEC));
+
+ printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n",
+ __FUNCTION__, vaddr, start_pfn + pfn);
+ }
+ }
+}
+#endif
+
static unsigned long calculate_numa_remap_pages(void)
{
int nid;
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index cebcbf152d46..71a14f89f89e 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -278,7 +278,7 @@ void __init numa_init_array(void)
int rr, i;
rr = first_node(node_online_map);
- for (i = 0; i < NR_CPUS; i++) {
+ for (i = 0; i < nr_cpu_ids; i++) {
if (early_cpu_to_node(i) != NUMA_NO_NODE)
continue;
numa_set_node(i, rr);
@@ -549,7 +549,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn)
memnodemap[0] = 0;
node_set_online(0);
node_set(0, node_possible_map);
- for (i = 0; i < NR_CPUS; i++)
+ for (i = 0; i < nr_cpu_ids; i++)
numa_set_node(i, 0);
e820_register_active_regions(0, start_pfn, last_pfn);
setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 51c0a2fc14fe..09737c8af074 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -382,7 +382,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
if (!node_online(i))
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- for (i = 0; i < NR_CPUS; i++) {
+ for (i = 0; i < nr_cpu_ids; i++) {
int node = early_cpu_to_node(i);
if (node == NUMA_NO_NODE)
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 022cd41ea9b4..202864ad49a7 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -401,14 +401,13 @@ static int __init ppro_init(char **cpu_type)
*cpu_type = "i386/pii";
break;
case 6 ... 8:
+ case 10 ... 11:
*cpu_type = "i386/piii";
break;
case 9:
+ case 13:
*cpu_type = "i386/p6_mobile";
break;
- case 10 ... 13:
- *cpu_type = "i386/p6";
- break;
case 14:
*cpu_type = "i386/core";
break;
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 3f1b81a83e2e..e9f80c744cf3 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -69,7 +69,7 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
int i;
if (!reset_value) {
- reset_value = kmalloc(sizeof(unsigned) * num_counters,
+ reset_value = kmalloc(sizeof(reset_value[0]) * num_counters,
GFP_ATOMIC);
if (!reset_value)
return;
@@ -156,6 +156,8 @@ static void ppro_start(struct op_msrs const * const msrs)
unsigned int low, high;
int i;
+ if (!reset_value)
+ return;
for (i = 0; i < num_counters; ++i) {
if (reset_value[i]) {
CTRL_READ(low, high, msrs, i);
@@ -171,6 +173,8 @@ static void ppro_stop(struct op_msrs const * const msrs)
unsigned int low, high;
int i;
+ if (!reset_value)
+ return;
for (i = 0; i < num_counters; ++i) {
if (!reset_value[i])
continue;
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index 9915293500fb..9a5af6c8fbe9 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -173,7 +173,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus,
#undef PCI_CONF2_ADDRESS
-static struct pci_raw_ops pci_direct_conf2 = {
+struct pci_raw_ops pci_direct_conf2 = {
.read = pci_conf2_read,
.write = pci_conf2_write,
};
@@ -289,6 +289,7 @@ int __init pci_direct_probe(void)
if (pci_check_type1()) {
raw_pci_ops = &pci_direct_conf1;
+ port_cf9_safe = true;
return 1;
}
release_resource(region);
@@ -305,6 +306,7 @@ int __init pci_direct_probe(void)
if (pci_check_type2()) {
raw_pci_ops = &pci_direct_conf2;
+ port_cf9_safe = true;
return 2;
}
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 3c27a809393b..2051dc96b8e9 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -496,21 +496,24 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015,
pci_siemens_interrupt_controller);
/*
- * Regular PCI devices have 256 bytes, but AMD Family 10h Opteron ext config
- * have 4096 bytes. Even if the device is capable, that doesn't mean we can
- * access it. Maybe we don't have a way to generate extended config space
- * accesses. So check it
+ * Regular PCI devices have 256 bytes, but AMD Family 10h/11h CPUs have
+ * 4096 bytes configuration space for each function of their processor
+ * configuration space.
*/
-static void fam10h_pci_cfg_space_size(struct pci_dev *dev)
+static void amd_cpu_pci_cfg_space_size(struct pci_dev *dev)
{
dev->cfg_size = pci_cfg_space_size_ext(dev);
}
-
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, fam10h_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, fam10h_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, fam10h_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, fam10h_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1300, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1301, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1302, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1303, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1304, amd_cpu_pci_cfg_space_size);
/*
* SB600: Disable BAR1 on device 14.0 to avoid HPET resources from
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h
index 15b9cf6be729..1959018aac02 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/pci/pci.h
@@ -96,6 +96,7 @@ extern struct pci_raw_ops *raw_pci_ops;
extern struct pci_raw_ops *raw_pci_ext_ops;
extern struct pci_raw_ops pci_direct_conf1;
+extern bool port_cf9_safe;
/* arch_initcall level */
extern int pci_direct_probe(void);
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index f2b6e3f11bfc..81197c62d5b3 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -12,6 +12,7 @@
#include <asm/system.h>
#include <asm/page.h>
#include <asm/pgtable.h>
+#include <asm/mmzone.h>
/* Defined in hibernate_asm_32.S */
extern int restore_image(void);
@@ -127,6 +128,9 @@ static int resume_physical_mapping_init(pgd_t *pgd_base)
}
}
}
+
+ resume_map_numa_kva(pgd_base);
+
return 0;
}
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 1ef0f90813d6..d9d35824c56f 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -9,6 +9,9 @@
* Also alternative() doesn't work.
*/
+/* Disable profiling for userspace code: */
+#define DISABLE_BRANCH_PROFILING
+
#include <linux/kernel.h>
#include <linux/posix-timers.h>
#include <linux/time.h>
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 688936044dc9..e59e53b11e2b 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -661,12 +661,11 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
* For 64-bit, we must skip the Xen hole in the middle of the address
* space, just after the big x86-64 virtual hole.
*/
-static int xen_pgd_walk(struct mm_struct *mm,
- int (*func)(struct mm_struct *mm, struct page *,
- enum pt_level),
- unsigned long limit)
+static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
+ int (*func)(struct mm_struct *mm, struct page *,
+ enum pt_level),
+ unsigned long limit)
{
- pgd_t *pgd = mm->pgd;
int flush = 0;
unsigned hole_low, hole_high;
unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
@@ -753,6 +752,14 @@ out:
return flush;
}
+static int xen_pgd_walk(struct mm_struct *mm,
+ int (*func)(struct mm_struct *mm, struct page *,
+ enum pt_level),
+ unsigned long limit)
+{
+ return __xen_pgd_walk(mm, mm->pgd, func, limit);
+}
+
/* If we're using split pte locks, then take the page's lock and
return a pointer to it. Otherwise return NULL. */
static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
@@ -854,7 +861,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
xen_mc_batch();
- if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) {
+ if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
/* re-enable interrupts for flushing */
xen_mc_issue(0);
@@ -998,7 +1005,7 @@ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
PT_PMD);
#endif
- xen_pgd_walk(mm, xen_unpin_page, USER_LIMIT);
+ __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT);
xen_mc_issue(0);
}
@@ -1072,7 +1079,7 @@ static void drop_other_mm_ref(void *info)
static void xen_drop_mm_ref(struct mm_struct *mm)
{
- cpumask_t mask;
+ cpumask_var_t mask;
unsigned cpu;
if (current->active_mm == mm) {
@@ -1084,7 +1091,16 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
}
/* Get the "official" set of cpus referring to our pagetable. */
- mask = mm->cpu_vm_mask;
+ if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
+ for_each_online_cpu(cpu) {
+ if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask)
+ && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
+ continue;
+ smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
+ }
+ return;
+ }
+ cpumask_copy(mask, &mm->cpu_vm_mask);
/* It's possible that a vcpu may have a stale reference to our
cr3, because its in lazy mode, and it hasn't yet flushed
@@ -1093,11 +1109,12 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
if needed. */
for_each_online_cpu(cpu) {
if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
- cpu_set(cpu, mask);
+ cpumask_set_cpu(cpu, mask);
}
- if (!cpus_empty(mask))
- smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
+ if (!cpumask_empty(mask))
+ smp_call_function_many(mask, drop_other_mm_ref, mm, 1);
+ free_cpumask_var(mask);
}
#else
static void xen_drop_mm_ref(struct mm_struct *mm)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d77da613b1d2..c44e2069c7c7 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -33,7 +33,7 @@
#include "xen-ops.h"
#include "mmu.h"
-cpumask_t xen_cpu_initialized_map;
+cpumask_var_t xen_cpu_initialized_map;
static DEFINE_PER_CPU(int, resched_irq);
static DEFINE_PER_CPU(int, callfunc_irq);
@@ -158,7 +158,7 @@ static void __init xen_fill_possible_map(void)
{
int i, rc;
- for (i = 0; i < NR_CPUS; i++) {
+ for (i = 0; i < nr_cpu_ids; i++) {
rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
if (rc >= 0) {
num_processors++;
@@ -192,11 +192,14 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
if (xen_smp_intr_init(0))
BUG();
- xen_cpu_initialized_map = cpumask_of_cpu(0);
+ if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL))
+ panic("could not allocate xen_cpu_initialized_map\n");
+
+ cpumask_copy(xen_cpu_initialized_map, cpumask_of(0));
/* Restrict the possible_map according to max_cpus. */
while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
- for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--)
+ for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
continue;
cpu_clear(cpu, cpu_possible_map);
}
@@ -221,7 +224,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
struct vcpu_guest_context *ctxt;
struct desc_struct *gdt;
- if (cpu_test_and_set(cpu, xen_cpu_initialized_map))
+ if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
return 0;
ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
@@ -362,7 +365,7 @@ static void xen_cpu_die(unsigned int cpu)
alternatives_smp_switch(0);
}
-static void xen_play_dead(void)
+static void __cpuinit xen_play_dead(void) /* used only with CPU_HOTPLUG */
{
play_dead_common();
HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
@@ -408,24 +411,23 @@ static void xen_smp_send_reschedule(int cpu)
xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
}
-static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
+static void xen_send_IPI_mask(const struct cpumask *mask,
+ enum ipi_vector vector)
{
unsigned cpu;
- cpus_and(mask, mask, cpu_online_map);
-
- for_each_cpu_mask_nr(cpu, mask)
+ for_each_cpu_and(cpu, mask, cpu_online_mask)
xen_send_IPI_one(cpu, vector);
}
-static void xen_smp_send_call_function_ipi(cpumask_t mask)
+static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
{
int cpu;
xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
/* Make sure other vcpus get a chance to run if they need to. */
- for_each_cpu_mask_nr(cpu, mask) {
+ for_each_cpu(cpu, mask) {
if (xen_vcpu_stolen(cpu)) {
HYPERVISOR_sched_op(SCHEDOP_yield, 0);
break;
@@ -435,7 +437,8 @@ static void xen_smp_send_call_function_ipi(cpumask_t mask)
static void xen_smp_send_call_function_single_ipi(int cpu)
{
- xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
+ xen_send_IPI_mask(cpumask_of(cpu),
+ XEN_CALL_FUNCTION_SINGLE_VECTOR);
}
static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 2a234db5949b..212ffe012b76 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -35,7 +35,8 @@ void xen_post_suspend(int suspend_cancelled)
pfn_to_mfn(xen_start_info->console.domU.mfn);
} else {
#ifdef CONFIG_SMP
- xen_cpu_initialized_map = cpu_online_map;
+ BUG_ON(xen_cpu_initialized_map == NULL);
+ cpumask_copy(xen_cpu_initialized_map, cpu_online_mask);
#endif
xen_vcpu_restore();
}
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index c9f7cda48ed7..65d75a6be0ba 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -437,7 +437,7 @@ void xen_setup_timer(int cpu)
evt = &per_cpu(xen_clock_events, cpu);
memcpy(evt, xen_clockevent, sizeof(*evt));
- evt->cpumask = cpumask_of_cpu(cpu);
+ evt->cpumask = cpumask_of(cpu);
evt->irq = irq;
setup_runstate_info(cpu);
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index d7422dc2a55c..c1f8faf0a2c5 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -49,7 +49,7 @@ bool xen_vcpu_stolen(int vcpu);
void xen_mark_init_mm_pinned(void);
-void __init xen_setup_vcpu_info_placement(void);
+void xen_setup_vcpu_info_placement(void);
#ifdef CONFIG_SMP
void xen_smp_init(void);
@@ -58,7 +58,7 @@ void __init xen_init_spinlocks(void);
__cpuinit void xen_init_lock_cpu(int cpu);
void xen_uninit_lock_cpu(int cpu);
-extern cpumask_t xen_cpu_initialized_map;
+extern cpumask_var_t xen_cpu_initialized_map;
#else
static inline void xen_smp_init(void) {}
#endif
OpenPOWER on IntegriCloud