summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/acpi/boot.c3
-rw-r--r--arch/x86/kernel/aperture_64.c20
-rw-r--r--arch/x86/kernel/apic/io_apic.c5
-rw-r--r--arch/x86/kernel/cpu/cyrix.c14
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c62
-rw-r--r--arch/x86/kernel/cpu/mce/apei.c10
-rw-r--r--arch/x86/kernel/cpu/mce/core.c30
-rw-r--r--arch/x86/kernel/cpu/mce/severity.c5
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c2
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c12
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h16
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c3
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c185
-rw-r--r--arch/x86/kernel/e820.c10
-rw-r--r--arch/x86/kernel/ftrace.c42
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/hw_breakpoint.c1
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c14
-rw-r--r--arch/x86/kernel/kvmclock.c20
-rw-r--r--arch/x86/kernel/mpparse.c4
-rw-r--r--arch/x86/kernel/setup_percpu.c10
-rw-r--r--arch/x86/kernel/unwind_frame.c25
-rw-r--r--arch/x86/kernel/unwind_orc.c17
23 files changed, 343 insertions, 169 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 2624de16cd7a..8dcbf6890714 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -935,6 +935,9 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table)
#define HPET_RESOURCE_NAME_SIZE 9
hpet_res = memblock_alloc(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE,
SMP_CACHE_BYTES);
+ if (!hpet_res)
+ panic("%s: Failed to allocate %zu bytes\n", __func__,
+ sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE);
hpet_res->name = (void *)&hpet_res[1];
hpet_res->flags = IORESOURCE_MEM;
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 58176b56354e..294ed4392a0e 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -14,6 +14,7 @@
#define pr_fmt(fmt) "AGP: " fmt
#include <linux/kernel.h>
+#include <linux/kcore.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/memblock.h>
@@ -57,7 +58,7 @@ int fallback_aper_force __initdata;
int fix_aperture __initdata = 1;
-#ifdef CONFIG_PROC_VMCORE
+#if defined(CONFIG_PROC_VMCORE) || defined(CONFIG_PROC_KCORE)
/*
* If the first kernel maps the aperture over e820 RAM, the kdump kernel will
* use the same range because it will remain configured in the northbridge.
@@ -66,20 +67,25 @@ int fix_aperture __initdata = 1;
*/
static unsigned long aperture_pfn_start, aperture_page_count;
-static int gart_oldmem_pfn_is_ram(unsigned long pfn)
+static int gart_mem_pfn_is_ram(unsigned long pfn)
{
return likely((pfn < aperture_pfn_start) ||
(pfn >= aperture_pfn_start + aperture_page_count));
}
-static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
+static void __init exclude_from_core(u64 aper_base, u32 aper_order)
{
aperture_pfn_start = aper_base >> PAGE_SHIFT;
aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT;
- WARN_ON(register_oldmem_pfn_is_ram(&gart_oldmem_pfn_is_ram));
+#ifdef CONFIG_PROC_VMCORE
+ WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram));
+#endif
+#ifdef CONFIG_PROC_KCORE
+ WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram));
+#endif
}
#else
-static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
+static void exclude_from_core(u64 aper_base, u32 aper_order)
{
}
#endif
@@ -474,7 +480,7 @@ out:
* may have allocated the range over its e820 RAM
* and fixed up the northbridge
*/
- exclude_from_vmcore(last_aper_base, last_aper_order);
+ exclude_from_core(last_aper_base, last_aper_order);
return 1;
}
@@ -520,7 +526,7 @@ out:
* overlap with the first kernel's memory. We can't access the
* range through vmcore even though it should be part of the dump.
*/
- exclude_from_vmcore(aper_alloc, aper_order);
+ exclude_from_core(aper_alloc, aper_order);
/* Fix up the north bridges */
for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 264e3221d923..53aa234a6803 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2581,6 +2581,8 @@ static struct resource * __init ioapic_setup_resources(void)
n *= nr_ioapics;
mem = memblock_alloc(n, SMP_CACHE_BYTES);
+ if (!mem)
+ panic("%s: Failed to allocate %lu bytes\n", __func__, n);
res = (void *)mem;
mem += sizeof(struct resource) * nr_ioapics;
@@ -2625,6 +2627,9 @@ fake_ioapic_page:
#endif
ioapic_phys = (unsigned long)memblock_alloc(PAGE_SIZE,
PAGE_SIZE);
+ if (!ioapic_phys)
+ panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+ __func__, PAGE_SIZE, PAGE_SIZE);
ioapic_phys = __pa(ioapic_phys);
}
set_fixmap_nocache(idx, ioapic_phys);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index d12226f60168..1d9b8aaea06c 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -124,7 +124,7 @@ static void set_cx86_reorder(void)
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
/* Load/Store Serialize to mem access disable (=reorder it) */
- setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80);
+ setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80);
/* set load/store serialize from 1GB to 4GB */
ccr3 |= 0xe0;
setCx86(CX86_CCR3, ccr3);
@@ -135,11 +135,11 @@ static void set_cx86_memwb(void)
pr_info("Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
/* CCR2 bit 2: unlock NW bit */
- setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04);
+ setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
/* set 'Not Write-through' */
write_cr0(read_cr0() | X86_CR0_NW);
/* CCR2 bit 2: lock NW bit and set WT1 */
- setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14);
+ setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14);
}
/*
@@ -153,14 +153,14 @@ static void geode_configure(void)
local_irq_save(flags);
/* Suspend on halt power saving and enable #SUSP pin */
- setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88);
+ setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88);
ccr3 = getCx86(CX86_CCR3);
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
/* FPU fast, DTE cache, Mem bypass */
- setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38);
+ setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38);
setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
set_cx86_memwb();
@@ -296,7 +296,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
/* GXm supports extended cpuid levels 'ala' AMD */
if (c->cpuid_level == 2) {
/* Enable cxMMX extensions (GX1 Datasheet 54) */
- setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1);
+ setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1);
/*
* GXm : 0x30 ... 0x5f GXm datasheet 51
@@ -319,7 +319,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
if (dir1 > 7) {
dir0_msn++; /* M II */
/* Enable MMX extensions (App note 108) */
- setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1);
+ setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1);
} else {
/* A 6x86MX - it has the bug. */
set_cpu_bug(c, X86_BUG_COMA);
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 89298c83de53..e64de5149e50 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -88,11 +88,17 @@ static struct smca_bank_name smca_names[] = {
[SMCA_FP] = { "floating_point", "Floating Point Unit" },
[SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
[SMCA_CS] = { "coherent_slave", "Coherent Slave" },
+ [SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" },
[SMCA_PIE] = { "pie", "Power, Interrupts, etc." },
[SMCA_UMC] = { "umc", "Unified Memory Controller" },
[SMCA_PB] = { "param_block", "Parameter Block" },
[SMCA_PSP] = { "psp", "Platform Security Processor" },
+ [SMCA_PSP_V2] = { "psp", "Platform Security Processor" },
[SMCA_SMU] = { "smu", "System Management Unit" },
+ [SMCA_SMU_V2] = { "smu", "System Management Unit" },
+ [SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" },
+ [SMCA_NBIO] = { "nbio", "Northbridge IO Unit" },
+ [SMCA_PCIE] = { "pcie", "PCI Express Unit" },
};
static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
@@ -138,30 +144,42 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
/* ZN Core (HWID=0xB0) MCA types */
- { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
+ { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF },
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },
/* HWID 0xB0 MCATYPE 0x4 is Reserved */
- { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0x7FF },
+ { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0xFFF },
{ SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F },
{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
/* Data Fabric MCA types */
{ SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF },
- { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0xF },
+ { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0x1F },
+ { SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2), 0x3FFF },
/* Unified Memory Controller MCA type */
- { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0x3F },
+ { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0xFF },
/* Parameter Block MCA type */
{ SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 },
/* Platform Security Processor MCA type */
{ SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 },
+ { SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1), 0x3FFFF },
/* System Management Unit MCA type */
{ SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 },
+ { SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1), 0x7FF },
+
+ /* Microprocessor 5 Unit MCA type */
+ { SMCA_MP5, HWID_MCATYPE(0x01, 0x2), 0x3FF },
+
+ /* Northbridge IO Unit MCA type */
+ { SMCA_NBIO, HWID_MCATYPE(0x18, 0x0), 0x1F },
+
+ /* PCI Express Unit MCA type */
+ { SMCA_PCIE, HWID_MCATYPE(0x46, 0x0), 0x1F },
};
struct smca_bank smca_banks[MAX_NR_BANKS];
@@ -545,6 +563,40 @@ out:
return offset;
}
+/*
+ * Turn off MC4_MISC thresholding banks on all family 0x15 models since
+ * they're not supported there.
+ */
+void disable_err_thresholding(struct cpuinfo_x86 *c)
+{
+ int i;
+ u64 hwcr;
+ bool need_toggle;
+ u32 msrs[] = {
+ 0x00000413, /* MC4_MISC0 */
+ 0xc0000408, /* MC4_MISC1 */
+ };
+
+ if (c->x86 != 0x15)
+ return;
+
+ rdmsrl(MSR_K7_HWCR, hwcr);
+
+ /* McStatusWrEn has to be set */
+ need_toggle = !(hwcr & BIT(18));
+
+ if (need_toggle)
+ wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
+
+ /* Clear CntP bit safely */
+ for (i = 0; i < ARRAY_SIZE(msrs); i++)
+ msr_clear_bit(msrs[i], 62);
+
+ /* restore old settings */
+ if (need_toggle)
+ wrmsrl(MSR_K7_HWCR, hwcr);
+}
+
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
@@ -552,6 +604,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
unsigned int bank, block, cpu = smp_processor_id();
int offset = -1;
+ disable_err_thresholding(c);
+
for (bank = 0; bank < mca_cfg.banks; ++bank) {
if (mce_flags.smca)
smca_configure(bank, cpu);
diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c
index 1d9b3ce662a0..c038e5c00a59 100644
--- a/arch/x86/kernel/cpu/mce/apei.c
+++ b/arch/x86/kernel/cpu/mce/apei.c
@@ -64,11 +64,11 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
#define CPER_CREATOR_MCE \
- UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
- 0x64, 0x90, 0xb8, 0x9d)
+ GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
+ 0x64, 0x90, 0xb8, 0x9d)
#define CPER_SECTION_TYPE_MCE \
- UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
- 0x04, 0x4a, 0x38, 0xfc)
+ GUID_INIT(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
+ 0x04, 0x4a, 0x38, 0xfc)
/*
* CPER specification (in UEFI specification 2.3 appendix N) requires
@@ -135,7 +135,7 @@ retry:
goto out;
/* try to skip other type records in storage */
else if (rc != sizeof(rcd) ||
- uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE))
+ !guid_equal(&rcd.hdr.creator_id, &CPER_CREATOR_MCE))
goto retry;
memcpy(m, &rcd.mce, sizeof(*m));
rc = sizeof(*m);
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 6ce290c506d9..b7fb541a4873 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1612,36 +1612,6 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
if (c->x86 == 0x15 && c->x86_model <= 0xf)
mce_flags.overflow_recov = 1;
- /*
- * Turn off MC4_MISC thresholding banks on those models since
- * they're not supported there.
- */
- if (c->x86 == 0x15 &&
- (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
- int i;
- u64 hwcr;
- bool need_toggle;
- u32 msrs[] = {
- 0x00000413, /* MC4_MISC0 */
- 0xc0000408, /* MC4_MISC1 */
- };
-
- rdmsrl(MSR_K7_HWCR, hwcr);
-
- /* McStatusWrEn has to be set */
- need_toggle = !(hwcr & BIT(18));
-
- if (need_toggle)
- wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
-
- /* Clear CntP bit safely */
- for (i = 0; i < ARRAY_SIZE(msrs); i++)
- msr_clear_bit(msrs[i], 62);
-
- /* restore old settings */
- if (need_toggle)
- wrmsrl(MSR_K7_HWCR, hwcr);
- }
}
if (c->x86_vendor == X86_VENDOR_INTEL) {
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index dc3e26e905a3..65201e180fe0 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -165,6 +165,11 @@ static struct severity {
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
KERNEL
),
+ MCESEV(
+ PANIC, "Instruction fetch error in kernel",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
+ KERNEL
+ ),
#endif
MCESEV(
PANIC, "Action required: unknown MCACOD",
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 97f9ada9ceda..5260185cbf7b 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -608,6 +608,8 @@ static int microcode_reload_late(void)
if (ret > 0)
microcode_check();
+ pr_info("Reload completed, microcode revision: 0x%x\n", boot_cpu_data.microcode);
+
return ret;
}
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index e81a2db42df7..3fa238a137d2 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -328,6 +328,18 @@ static void __init ms_hyperv_init_platform(void)
# ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;
# endif
+
+ /*
+ * Hyper-V doesn't provide irq remapping for IO-APIC. To enable x2apic,
+ * set x2apic destination mode to physcial mode when x2apic is available
+ * and Hyper-V IOMMU driver makes sure cpus assigned with IO-APIC irqs
+ * have 8-bit APIC id.
+ */
+# ifdef CONFIG_X86_X2APIC
+ if (x2apic_supported())
+ x2apic_phys = 1;
+# endif
+
#endif
}
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 822b7db634ee..e49b77283924 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -4,6 +4,7 @@
#include <linux/sched.h>
#include <linux/kernfs.h>
+#include <linux/fs_context.h>
#include <linux/jump_label.h>
#define MSR_IA32_L3_QOS_CFG 0xc81
@@ -40,6 +41,21 @@
#define RMID_VAL_ERROR BIT_ULL(63)
#define RMID_VAL_UNAVAIL BIT_ULL(62)
+
+struct rdt_fs_context {
+ struct kernfs_fs_context kfc;
+ bool enable_cdpl2;
+ bool enable_cdpl3;
+ bool enable_mba_mbps;
+};
+
+static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
+{
+ struct kernfs_fs_context *kfc = fc->fs_private;
+
+ return container_of(kfc, struct rdt_fs_context, kfc);
+}
+
DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
/**
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index f33f11f69078..1573a0a6b525 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -501,11 +501,8 @@ out_unlock:
void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
{
unsigned long delay = msecs_to_jiffies(delay_ms);
- struct rdt_resource *r;
int cpu;
- r = &rdt_resources_all[RDT_RESOURCE_L3];
-
cpu = cpumask_any(&dom->cpu_mask);
dom->cqm_work_cpu = cpu;
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 8388adf241b2..399601eda8e4 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -24,6 +24,7 @@
#include <linux/cpu.h>
#include <linux/debugfs.h>
#include <linux/fs.h>
+#include <linux/fs_parser.h>
#include <linux/sysfs.h>
#include <linux/kernfs.h>
#include <linux/seq_buf.h>
@@ -32,6 +33,7 @@
#include <linux/sched/task.h>
#include <linux/slab.h>
#include <linux/task_work.h>
+#include <linux/user_namespace.h>
#include <uapi/linux/magic.h>
@@ -1858,46 +1860,6 @@ static void cdp_disable_all(void)
cdpl2_disable();
}
-static int parse_rdtgroupfs_options(char *data)
-{
- char *token, *o = data;
- int ret = 0;
-
- while ((token = strsep(&o, ",")) != NULL) {
- if (!*token) {
- ret = -EINVAL;
- goto out;
- }
-
- if (!strcmp(token, "cdp")) {
- ret = cdpl3_enable();
- if (ret)
- goto out;
- } else if (!strcmp(token, "cdpl2")) {
- ret = cdpl2_enable();
- if (ret)
- goto out;
- } else if (!strcmp(token, "mba_MBps")) {
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
- ret = set_mba_sc(true);
- else
- ret = -EINVAL;
- if (ret)
- goto out;
- } else {
- ret = -EINVAL;
- goto out;
- }
- }
-
- return 0;
-
-out:
- pr_err("Invalid mount option \"%s\"\n", token);
-
- return ret;
-}
-
/*
* We don't allow rdtgroup directories to be created anywhere
* except the root directory. Thus when looking for the rdtgroup
@@ -1969,13 +1931,27 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn,
struct rdtgroup *prgrp,
struct kernfs_node **mon_data_kn);
-static struct dentry *rdt_mount(struct file_system_type *fs_type,
- int flags, const char *unused_dev_name,
- void *data)
+static int rdt_enable_ctx(struct rdt_fs_context *ctx)
+{
+ int ret = 0;
+
+ if (ctx->enable_cdpl2)
+ ret = cdpl2_enable();
+
+ if (!ret && ctx->enable_cdpl3)
+ ret = cdpl3_enable();
+
+ if (!ret && ctx->enable_mba_mbps)
+ ret = set_mba_sc(true);
+
+ return ret;
+}
+
+static int rdt_get_tree(struct fs_context *fc)
{
+ struct rdt_fs_context *ctx = rdt_fc2context(fc);
struct rdt_domain *dom;
struct rdt_resource *r;
- struct dentry *dentry;
int ret;
cpus_read_lock();
@@ -1984,53 +1960,42 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
* resctrl file system can only be mounted once.
*/
if (static_branch_unlikely(&rdt_enable_key)) {
- dentry = ERR_PTR(-EBUSY);
+ ret = -EBUSY;
goto out;
}
- ret = parse_rdtgroupfs_options(data);
- if (ret) {
- dentry = ERR_PTR(ret);
+ ret = rdt_enable_ctx(ctx);
+ if (ret < 0)
goto out_cdp;
- }
closid_init();
ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
- if (ret) {
- dentry = ERR_PTR(ret);
- goto out_cdp;
- }
+ if (ret < 0)
+ goto out_mba;
if (rdt_mon_capable) {
ret = mongroup_create_dir(rdtgroup_default.kn,
NULL, "mon_groups",
&kn_mongrp);
- if (ret) {
- dentry = ERR_PTR(ret);
+ if (ret < 0)
goto out_info;
- }
kernfs_get(kn_mongrp);
ret = mkdir_mondata_all(rdtgroup_default.kn,
&rdtgroup_default, &kn_mondata);
- if (ret) {
- dentry = ERR_PTR(ret);
+ if (ret < 0)
goto out_mongrp;
- }
kernfs_get(kn_mondata);
rdtgroup_default.mon.mon_data_kn = kn_mondata;
}
ret = rdt_pseudo_lock_init();
- if (ret) {
- dentry = ERR_PTR(ret);
+ if (ret)
goto out_mondata;
- }
- dentry = kernfs_mount(fs_type, flags, rdt_root,
- RDTGROUP_SUPER_MAGIC, NULL);
- if (IS_ERR(dentry))
+ ret = kernfs_get_tree(fc);
+ if (ret < 0)
goto out_psl;
if (rdt_alloc_capable)
@@ -2059,14 +2024,95 @@ out_mongrp:
kernfs_remove(kn_mongrp);
out_info:
kernfs_remove(kn_info);
+out_mba:
+ if (ctx->enable_mba_mbps)
+ set_mba_sc(false);
out_cdp:
cdp_disable_all();
out:
rdt_last_cmd_clear();
mutex_unlock(&rdtgroup_mutex);
cpus_read_unlock();
+ return ret;
+}
+
+enum rdt_param {
+ Opt_cdp,
+ Opt_cdpl2,
+ Opt_mba_mpbs,
+ nr__rdt_params
+};
+
+static const struct fs_parameter_spec rdt_param_specs[] = {
+ fsparam_flag("cdp", Opt_cdp),
+ fsparam_flag("cdpl2", Opt_cdpl2),
+ fsparam_flag("mba_mpbs", Opt_mba_mpbs),
+ {}
+};
+
+static const struct fs_parameter_description rdt_fs_parameters = {
+ .name = "rdt",
+ .specs = rdt_param_specs,
+};
+
+static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct rdt_fs_context *ctx = rdt_fc2context(fc);
+ struct fs_parse_result result;
+ int opt;
+
+ opt = fs_parse(fc, &rdt_fs_parameters, param, &result);
+ if (opt < 0)
+ return opt;
- return dentry;
+ switch (opt) {
+ case Opt_cdp:
+ ctx->enable_cdpl3 = true;
+ return 0;
+ case Opt_cdpl2:
+ ctx->enable_cdpl2 = true;
+ return 0;
+ case Opt_mba_mpbs:
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return -EINVAL;
+ ctx->enable_mba_mbps = true;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static void rdt_fs_context_free(struct fs_context *fc)
+{
+ struct rdt_fs_context *ctx = rdt_fc2context(fc);
+
+ kernfs_free_fs_context(fc);
+ kfree(ctx);
+}
+
+static const struct fs_context_operations rdt_fs_context_ops = {
+ .free = rdt_fs_context_free,
+ .parse_param = rdt_parse_param,
+ .get_tree = rdt_get_tree,
+};
+
+static int rdt_init_fs_context(struct fs_context *fc)
+{
+ struct rdt_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->kfc.root = rdt_root;
+ ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
+ fc->fs_private = &ctx->kfc;
+ fc->ops = &rdt_fs_context_ops;
+ if (fc->user_ns)
+ put_user_ns(fc->user_ns);
+ fc->user_ns = get_user_ns(&init_user_ns);
+ fc->global = true;
+ return 0;
}
static int reset_all_ctrls(struct rdt_resource *r)
@@ -2239,9 +2285,10 @@ static void rdt_kill_sb(struct super_block *sb)
}
static struct file_system_type rdt_fs_type = {
- .name = "resctrl",
- .mount = rdt_mount,
- .kill_sb = rdt_kill_sb,
+ .name = "resctrl",
+ .init_fs_context = rdt_init_fs_context,
+ .parameters = &rdt_fs_parameters,
+ .kill_sb = rdt_kill_sb,
};
static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index a687d10da417..2879e234e193 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -14,6 +14,7 @@
#include <linux/acpi.h>
#include <linux/firmware-map.h>
#include <linux/sort.h>
+#include <linux/memory_hotplug.h>
#include <asm/e820/api.h>
#include <asm/setup.h>
@@ -775,7 +776,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align)
{
u64 addr;
- addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
+ addr = memblock_phys_alloc(size, align);
if (addr) {
e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
pr_info("update e820_table_kexec for e820__memblock_alloc_reserved()\n");
@@ -878,6 +879,10 @@ static int __init parse_memopt(char *p)
e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1);
+#ifdef CONFIG_MEMORY_HOTPLUG
+ max_mem_size = mem_size;
+#endif
+
return 0;
}
early_param("mem", parse_memopt);
@@ -1092,6 +1097,9 @@ void __init e820__reserve_resources(void)
res = memblock_alloc(sizeof(*res) * e820_table->nr_entries,
SMP_CACHE_BYTES);
+ if (!res)
+ panic("%s: Failed to allocate %zu bytes\n", __func__,
+ sizeof(*res) * e820_table->nr_entries);
e820_res = res;
for (i = 0; i < e820_table->nr_entries; i++) {
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 3e3789c8f8e1..ef49517f6bb2 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -49,7 +49,7 @@ int ftrace_arch_code_modify_post_process(void)
union ftrace_code_union {
char code[MCOUNT_INSN_SIZE];
struct {
- unsigned char e8;
+ unsigned char op;
int offset;
} __attribute__((packed));
};
@@ -59,20 +59,23 @@ static int ftrace_calc_offset(long ip, long addr)
return (int)(addr - ip);
}
-static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+static unsigned char *
+ftrace_text_replace(unsigned char op, unsigned long ip, unsigned long addr)
{
static union ftrace_code_union calc;
- calc.e8 = 0xe8;
+ calc.op = op;
calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
- /*
- * No locking needed, this must be called via kstop_machine
- * which in essence is like running on a uniprocessor machine.
- */
return calc.code;
}
+static unsigned char *
+ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+ return ftrace_text_replace(0xe8, ip, addr);
+}
+
static inline int
within(unsigned long addr, unsigned long start, unsigned long end)
{
@@ -665,22 +668,6 @@ int __init ftrace_dyn_arch_init(void)
return 0;
}
-#if defined(CONFIG_X86_64) || defined(CONFIG_FUNCTION_GRAPH_TRACER)
-static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
-{
- static union ftrace_code_union calc;
-
- /* Jmp not a call (ignore the .e8) */
- calc.e8 = 0xe9;
- calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
-
- /*
- * ftrace external locks synchronize the access to the static variable.
- */
- return calc.code;
-}
-#endif
-
/* Currently only x86_64 supports dynamic trampolines */
#ifdef CONFIG_X86_64
@@ -892,8 +879,8 @@ static void *addr_from_call(void *ptr)
return NULL;
/* Make sure this is a call */
- if (WARN_ON_ONCE(calc.e8 != 0xe8)) {
- pr_warn("Expected e8, got %x\n", calc.e8);
+ if (WARN_ON_ONCE(calc.op != 0xe8)) {
+ pr_warn("Expected e8, got %x\n", calc.op);
return NULL;
}
@@ -964,6 +951,11 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops)
#ifdef CONFIG_DYNAMIC_FTRACE
extern void ftrace_graph_call(void);
+static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
+{
+ return ftrace_text_replace(0xe9, ip, addr);
+}
+
static int ftrace_mod_jmp(unsigned long ip, void *func)
{
unsigned char *new;
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index dfd3aca82c61..fb32925a2e62 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -905,6 +905,8 @@ int __init hpet_enable(void)
return 0;
hpet_set_mapping();
+ if (!hpet_virt_address)
+ return 0;
/*
* Read the period and check for a sane value:
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index ff9bfd40429e..d73083021002 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -354,6 +354,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
#endif
default:
WARN_ON_ONCE(1);
+ return -EINVAL;
}
/*
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 1f3b77367948..22f60dd26460 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -538,9 +538,17 @@ static int bzImage64_cleanup(void *loader_data)
#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG
static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len)
{
- return verify_pefile_signature(kernel, kernel_len,
- VERIFY_USE_SECONDARY_KEYRING,
- VERIFYING_KEXEC_PE_SIGNATURE);
+ int ret;
+
+ ret = verify_pefile_signature(kernel, kernel_len,
+ VERIFY_USE_SECONDARY_KEYRING,
+ VERIFYING_KEXEC_PE_SIGNATURE);
+ if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING)) {
+ ret = verify_pefile_signature(kernel, kernel_len,
+ VERIFY_USE_PLATFORM_KEYRING,
+ VERIFYING_KEXEC_PE_SIGNATURE);
+ }
+ return ret;
}
#endif
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index e811d4d1c824..904494b924c1 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -104,12 +104,8 @@ static u64 kvm_sched_clock_read(void)
static inline void kvm_sched_clock_init(bool stable)
{
- if (!stable) {
- pv_ops.time.sched_clock = kvm_clock_read;
+ if (!stable)
clear_sched_clock_stable();
- return;
- }
-
kvm_sched_clock_offset = kvm_clock_read();
pv_ops.time.sched_clock = kvm_sched_clock_read;
@@ -355,6 +351,20 @@ void __init kvmclock_init(void)
machine_ops.crash_shutdown = kvm_crash_shutdown;
#endif
kvm_get_preset_lpj();
+
+ /*
+ * X86_FEATURE_NONSTOP_TSC is TSC runs at constant rate
+ * with P/T states and does not stop in deep C-states.
+ *
+ * Invariant TSC exposed by host means kvmclock is not necessary:
+ * can use TSC as clocksource.
+ *
+ */
+ if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
+ boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
+ !check_tsc_unstable())
+ kvm_clock.rating = 299;
+
clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
pv_info.name = "KVM";
}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 3482460d984d..1bfe5c6e6cfe 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -598,8 +598,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
mpf_base = base;
mpf_found = true;
- pr_info("found SMP MP-table at [mem %#010lx-%#010lx] mapped at [%p]\n",
- base, base + sizeof(*mpf) - 1, mpf);
+ pr_info("found SMP MP-table at [mem %#010lx-%#010lx]\n",
+ base, base + sizeof(*mpf) - 1);
memblock_reserve(base, sizeof(*mpf));
if (mpf->physptr)
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 13af08827eef..4bf46575568a 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -106,22 +106,22 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
void *ptr;
if (!node_online(node) || !NODE_DATA(node)) {
- ptr = memblock_alloc_from_nopanic(size, align, goal);
+ ptr = memblock_alloc_from(size, align, goal);
pr_info("cpu %d has no node %d or node-local memory\n",
cpu, node);
pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
cpu, size, __pa(ptr));
} else {
- ptr = memblock_alloc_try_nid_nopanic(size, align, goal,
- MEMBLOCK_ALLOC_ACCESSIBLE,
- node);
+ ptr = memblock_alloc_try_nid(size, align, goal,
+ MEMBLOCK_ALLOC_ACCESSIBLE,
+ node);
pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n",
cpu, size, node, __pa(ptr));
}
return ptr;
#else
- return memblock_alloc_from_nopanic(size, align, goal);
+ return memblock_alloc_from(size, align, goal);
#endif
}
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 3dc26f95d46e..9b9fd4826e7a 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -320,10 +320,14 @@ bool unwind_next_frame(struct unwind_state *state)
}
/* Get the next frame pointer: */
- if (state->regs)
+ if (state->next_bp) {
+ next_bp = state->next_bp;
+ state->next_bp = NULL;
+ } else if (state->regs) {
next_bp = (unsigned long *)state->regs->bp;
- else
+ } else {
next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task, *state->bp);
+ }
/* Move to the next frame if it's safe: */
if (!update_stack_state(state, next_bp))
@@ -398,6 +402,21 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
bp = get_frame_pointer(task, regs);
+ /*
+ * If we crash with IP==0, the last successfully executed instruction
+ * was probably an indirect function call with a NULL function pointer.
+ * That means that SP points into the middle of an incomplete frame:
+ * *SP is a return pointer, and *(SP-sizeof(unsigned long)) is where we
+ * would have written a frame pointer if we hadn't crashed.
+ * Pretend that the frame is complete and that BP points to it, but save
+ * the real BP so that we can use it when looking for the next frame.
+ */
+ if (regs && regs->ip == 0 &&
+ (unsigned long *)kernel_stack_pointer(regs) >= first_frame) {
+ state->next_bp = bp;
+ bp = ((unsigned long *)kernel_stack_pointer(regs)) - 1;
+ }
+
/* Initialize stack info and make sure the frame data is accessible: */
get_stack_info(bp, state->task, &state->stack_info,
&state->stack_mask);
@@ -410,7 +429,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
*/
while (!unwind_done(state) &&
(!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
- state->bp < first_frame))
+ (state->next_bp == NULL && state->bp < first_frame)))
unwind_next_frame(state);
}
EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 26038eacf74a..89be1be1790c 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -113,6 +113,20 @@ static struct orc_entry *orc_ftrace_find(unsigned long ip)
}
#endif
+/*
+ * If we crash with IP==0, the last successfully executed instruction
+ * was probably an indirect function call with a NULL function pointer,
+ * and we don't have unwind information for NULL.
+ * This hardcoded ORC entry for IP==0 allows us to unwind from a NULL function
+ * pointer into its parent and then continue normally from there.
+ */
+static struct orc_entry null_orc_entry = {
+ .sp_offset = sizeof(long),
+ .sp_reg = ORC_REG_SP,
+ .bp_reg = ORC_REG_UNDEFINED,
+ .type = ORC_TYPE_CALL
+};
+
static struct orc_entry *orc_find(unsigned long ip)
{
static struct orc_entry *orc;
@@ -120,6 +134,9 @@ static struct orc_entry *orc_find(unsigned long ip)
if (!orc_init)
return NULL;
+ if (ip == 0)
+ return &null_orc_entry;
+
/* For non-init vmlinux addresses, use the fast lookup table: */
if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) {
unsigned int idx, start, stop;
OpenPOWER on IntegriCloud