diff options
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r-- | arch/x86/kernel/cpu/Makefile | 7 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/bugs.c | 37 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/Makefile (renamed from arch/x86/kernel/cpu/mcheck/Makefile) | 10 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/amd.c (renamed from arch/x86/kernel/cpu/mcheck/mce_amd.c) | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/apei.c (renamed from arch/x86/kernel/cpu/mcheck/mce-apei.c) | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/core.c (renamed from arch/x86/kernel/cpu/mcheck/mce.c) | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/dev-mcelog.c (renamed from arch/x86/kernel/cpu/mcheck/dev-mcelog.c) | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/genpool.c (renamed from arch/x86/kernel/cpu/mcheck/mce-genpool.c) | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/inject.c (renamed from arch/x86/kernel/cpu/mcheck/mce-inject.c) | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/intel.c (renamed from arch/x86/kernel/cpu/mcheck/mce_intel.c) | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/internal.h (renamed from arch/x86/kernel/cpu/mcheck/mce-internal.h) | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/p5.c (renamed from arch/x86/kernel/cpu/mcheck/p5.c) | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/severity.c (renamed from arch/x86/kernel/cpu/mcheck/mce-severity.c) | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/therm_throt.c (renamed from arch/x86/kernel/cpu/mcheck/therm_throt.c) | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/threshold.c (renamed from arch/x86/kernel/cpu/mcheck/threshold.c) | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mce/winchip.c (renamed from arch/x86/kernel/cpu/mcheck/winchip.c) | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/microcode/amd.c | 470 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mtrr/if.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/Makefile | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/core.c (renamed from arch/x86/kernel/cpu/intel_rdt.c) | 186 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/ctrlmondata.c (renamed from arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c) | 111 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/internal.h (renamed from arch/x86/kernel/cpu/intel_rdt.h) | 55 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/monitor.c (renamed from arch/x86/kernel/cpu/intel_rdt_monitor.c) | 16 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/pseudo_lock.c (renamed from arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c) | 40 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/pseudo_lock_event.h (renamed from arch/x86/kernel/cpu/intel_rdt_pseudo_lock_event.h) | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/rdtgroup.c (renamed from arch/x86/kernel/cpu/intel_rdt_rdtgroup.c) | 61 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/scattered.c | 7 |
27 files changed, 709 insertions, 330 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 1f5d2291c31e..ac78f90aea56 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -36,13 +36,10 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o -obj-$(CONFIG_INTEL_RDT) += intel_rdt.o intel_rdt_rdtgroup.o intel_rdt_monitor.o -obj-$(CONFIG_INTEL_RDT) += intel_rdt_ctrlmondata.o intel_rdt_pseudo_lock.o -CFLAGS_intel_rdt_pseudo_lock.o = -I$(src) - -obj-$(CONFIG_X86_MCE) += mcheck/ +obj-$(CONFIG_X86_MCE) += mce/ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_MICROCODE) += microcode/ +obj-$(CONFIG_RESCTRL) += resctrl/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 500278f5308e..77bf22546ddd 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -54,7 +54,7 @@ static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; u64 __ro_after_init x86_amd_ls_cfg_base; u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask; -/* Control conditional STIPB in switch_to() */ +/* Control conditional STIBP in switch_to() */ DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp); /* Control conditional IBPB in switch_mm() */ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); @@ -262,10 +262,11 @@ enum spectre_v2_user_cmd { }; static const char * const spectre_v2_user_strings[] = { - [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", - [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", - [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl", - [SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl", + [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", + [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", + [SPECTRE_V2_USER_STRICT_PREFERRED] = "User space: Mitigation: STIBP always-on protection", + [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl", + [SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl", }; static const struct { @@ -355,6 +356,15 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) break; } + /* + * At this point, an STIBP mode other than "off" has been set. + * If STIBP support is not being forced, check if STIBP always-on + * is preferred. + */ + if (mode != SPECTRE_V2_USER_STRICT && + boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) + mode = SPECTRE_V2_USER_STRICT_PREFERRED; + /* Initialize Indirect Branch Prediction Barrier */ if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); @@ -379,12 +389,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) "always-on" : "conditional"); } - /* If enhanced IBRS is enabled no STIPB required */ + /* If enhanced IBRS is enabled no STIBP required */ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return; /* - * If SMT is not possible or STIBP is not available clear the STIPB + * If SMT is not possible or STIBP is not available clear the STIBP * mode. */ if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP)) @@ -610,6 +620,7 @@ void arch_smt_update(void) case SPECTRE_V2_USER_NONE: break; case SPECTRE_V2_USER_STRICT: + case SPECTRE_V2_USER_STRICT_PREFERRED: update_stibp_strict(); break; case SPECTRE_V2_USER_PRCTL: @@ -812,7 +823,8 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) * Indirect branch speculation is always disabled in strict * mode. */ - if (spectre_v2_user == SPECTRE_V2_USER_STRICT) + if (spectre_v2_user == SPECTRE_V2_USER_STRICT || + spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED) return -EPERM; task_clear_spec_ib_disable(task); task_update_spec_tif(task); @@ -825,7 +837,8 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) */ if (spectre_v2_user == SPECTRE_V2_USER_NONE) return -EPERM; - if (spectre_v2_user == SPECTRE_V2_USER_STRICT) + if (spectre_v2_user == SPECTRE_V2_USER_STRICT || + spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED) return 0; task_set_spec_ib_disable(task); if (ctrl == PR_SPEC_FORCE_DISABLE) @@ -896,6 +909,7 @@ static int ib_prctl_get(struct task_struct *task) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; case SPECTRE_V2_USER_STRICT: + case SPECTRE_V2_USER_STRICT_PREFERRED: return PR_SPEC_DISABLE; default: return PR_SPEC_NOT_AFFECTED; @@ -1002,7 +1016,8 @@ static void __init l1tf_select_mitigation(void) #endif half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT; - if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) { + if (l1tf_mitigation != L1TF_MITIGATION_OFF && + e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) { pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n"); pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n", half_pa); @@ -1088,6 +1103,8 @@ static char *stibp_state(void) return ", STIBP: disabled"; case SPECTRE_V2_USER_STRICT: return ", STIBP: forced"; + case SPECTRE_V2_USER_STRICT_PREFERRED: + return ", STIBP: always-on"; case SPECTRE_V2_USER_PRCTL: case SPECTRE_V2_USER_SECCOMP: if (static_key_enabled(&switch_to_cond_stibp)) diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mce/Makefile index bcc7c54c7041..9f020c994154 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mce/Makefile @@ -1,14 +1,16 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y = mce.o mce-severity.o mce-genpool.o +obj-y = core.o severity.o genpool.o obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o -obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o -obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o +obj-$(CONFIG_X86_MCE_INTEL) += intel.o +obj-$(CONFIG_X86_MCE_AMD) += amd.o obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o + +mce-inject-y := inject.o obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o -obj-$(CONFIG_ACPI_APEI) += mce-apei.o +obj-$(CONFIG_ACPI_APEI) += apei.o obj-$(CONFIG_X86_MCELOG_LEGACY) += dev-mcelog.o diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mce/amd.c index e12454e21b8a..4a2fb59a372e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -28,7 +28,7 @@ #include <asm/msr.h> #include <asm/trace/irq_vectors.h> -#include "mce-internal.h" +#include "internal.h" #define NR_BLOCKS 5 #define THRESHOLD_MAX 0xFFF diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mce/apei.c index 2eee85379689..1d9b3ce662a0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mce/apei.c @@ -36,7 +36,7 @@ #include <acpi/ghes.h> #include <asm/mce.h> -#include "mce-internal.h" +#include "internal.h" void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) { diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mce/core.c index 36d2696c9563..ad8f62a0c706 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -8,8 +8,6 @@ * Author: Andi Kleen */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - #include <linux/thread_info.h> #include <linux/capability.h> #include <linux/miscdevice.h> @@ -52,7 +50,7 @@ #include <asm/msr.h> #include <asm/reboot.h> -#include "mce-internal.h" +#include "internal.h" static DEFINE_MUTEX(mce_log_mutex); diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c index 27f394ac983f..9690ec5c8051 100644 --- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c +++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c @@ -8,14 +8,12 @@ * Author: Andi Kleen */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - #include <linux/miscdevice.h> #include <linux/slab.h> #include <linux/kmod.h> #include <linux/poll.h> -#include "mce-internal.h" +#include "internal.h" static BLOCKING_NOTIFIER_HEAD(mce_injector_chain); diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mce/genpool.c index 217cd4449bc9..3395549c51d3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c +++ b/arch/x86/kernel/cpu/mce/genpool.c @@ -10,7 +10,7 @@ #include <linux/mm.h> #include <linux/genalloc.h> #include <linux/llist.h> -#include "mce-internal.h" +#include "internal.h" /* * printk() is not safe in MCE context. This is a lock-less memory allocator diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mce/inject.c index 1fc424c40a31..8492ef7d9015 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -38,7 +38,7 @@ #include <asm/nmi.h> #include <asm/smp.h> -#include "mce-internal.h" +#include "internal.h" /* * Collect all the MCi_XXX settings diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mce/intel.c index d05be307d081..e43eb6732630 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -18,7 +18,7 @@ #include <asm/msr.h> #include <asm/mce.h> -#include "mce-internal.h" +#include "internal.h" /* * Support for Intel Correct Machine Check Interrupts. This allows diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mce/internal.h index ceb67cd5918f..af5eab1e65e2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -2,6 +2,9 @@ #ifndef __X86_MCE_INTERNAL_H__ #define __X86_MCE_INTERNAL_H__ +#undef pr_fmt +#define pr_fmt(fmt) "mce: " fmt + #include <linux/device.h> #include <asm/mce.h> diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mce/p5.c index 5cddf831720f..4ae6df556526 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mce/p5.c @@ -14,6 +14,8 @@ #include <asm/mce.h> #include <asm/msr.h> +#include "internal.h" + /* By default disabled */ int mce_p5_enabled __read_mostly; diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mce/severity.c index 44396d521987..dc3e26e905a3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -16,7 +16,7 @@ #include <asm/mce.h> #include <linux/uaccess.h> -#include "mce-internal.h" +#include "internal.h" /* * Grade an mce by severity. In general the most severe ones are processed diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c index 2da67b70ba98..df01ff8513a5 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mce/therm_throt.c @@ -30,6 +30,8 @@ #include <asm/msr.h> #include <asm/trace/irq_vectors.h> +#include "internal.h" + /* How long to wait between reporting thermal events */ #define CHECK_INTERVAL (300 * HZ) diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mce/threshold.c index 2b584b319eff..10586a85c23f 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mce/threshold.c @@ -10,6 +10,8 @@ #include <asm/mce.h> #include <asm/trace/irq_vectors.h> +#include "internal.h" + static void default_threshold_interrupt(void) { pr_err("Unexpected threshold interrupt at vector %x\n", diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mce/winchip.c index 3b45b270a865..a30ea13cccc2 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mce/winchip.c @@ -13,6 +13,8 @@ #include <asm/mce.h> #include <asm/msr.h> +#include "internal.h" + /* Machine check handler for WinChip C6: */ static void winchip_machine_check(struct pt_regs *regs, long error_code) { diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 07b5fc00b188..51adde0a0f1a 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -5,7 +5,7 @@ * CPUs and later. * * Copyright (C) 2008-2011 Advanced Micro Devices Inc. - * 2013-2016 Borislav Petkov <bp@alien8.de> + * 2013-2018 Borislav Petkov <bp@alien8.de> * * Author: Peter Oruba <peter.oruba@amd.com> * @@ -38,7 +38,10 @@ #include <asm/cpu.h> #include <asm/msr.h> -static struct equiv_cpu_entry *equiv_cpu_table; +static struct equiv_cpu_table { + unsigned int num_entries; + struct equiv_cpu_entry *entry; +} equiv_table; /* * This points to the current valid container of microcode patches which we will @@ -63,13 +66,225 @@ static u8 amd_ucode_patch[PATCH_MAX_SIZE]; static const char ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin"; -static u16 find_equiv_id(struct equiv_cpu_entry *equiv_table, u32 sig) +static u16 find_equiv_id(struct equiv_cpu_table *et, u32 sig) { - for (; equiv_table && equiv_table->installed_cpu; equiv_table++) { - if (sig == equiv_table->installed_cpu) - return equiv_table->equiv_cpu; + unsigned int i; + + if (!et || !et->num_entries) + return 0; + + for (i = 0; i < et->num_entries; i++) { + struct equiv_cpu_entry *e = &et->entry[i]; + + if (sig == e->installed_cpu) + return e->equiv_cpu; + + e++; + } + return 0; +} + +/* + * Check whether there is a valid microcode container file at the beginning + * of @buf of size @buf_size. Set @early to use this function in the early path. + */ +static bool verify_container(const u8 *buf, size_t buf_size, bool early) +{ + u32 cont_magic; + + if (buf_size <= CONTAINER_HDR_SZ) { + if (!early) + pr_debug("Truncated microcode container header.\n"); + + return false; + } + + cont_magic = *(const u32 *)buf; + if (cont_magic != UCODE_MAGIC) { + if (!early) + pr_debug("Invalid magic value (0x%08x).\n", cont_magic); + + return false; + } + + return true; +} + +/* + * Check whether there is a valid, non-truncated CPU equivalence table at the + * beginning of @buf of size @buf_size. Set @early to use this function in the + * early path. + */ +static bool verify_equivalence_table(const u8 *buf, size_t buf_size, bool early) +{ + const u32 *hdr = (const u32 *)buf; + u32 cont_type, equiv_tbl_len; + + if (!verify_container(buf, buf_size, early)) + return false; + + cont_type = hdr[1]; + if (cont_type != UCODE_EQUIV_CPU_TABLE_TYPE) { + if (!early) + pr_debug("Wrong microcode container equivalence table type: %u.\n", + cont_type); + + return false; + } + + buf_size -= CONTAINER_HDR_SZ; + + equiv_tbl_len = hdr[2]; + if (equiv_tbl_len < sizeof(struct equiv_cpu_entry) || + buf_size < equiv_tbl_len) { + if (!early) + pr_debug("Truncated equivalence table.\n"); + + return false; + } + + return true; +} + +/* + * Check whether there is a valid, non-truncated microcode patch section at the + * beginning of @buf of size @buf_size. Set @early to use this function in the + * early path. + * + * On success, @sh_psize returns the patch size according to the section header, + * to the caller. + */ +static bool +__verify_patch_section(const u8 *buf, size_t buf_size, u32 *sh_psize, bool early) +{ + u32 p_type, p_size; + const u32 *hdr; + + if (buf_size < SECTION_HDR_SIZE) { + if (!early) + pr_debug("Truncated patch section.\n"); + + return false; + } + + hdr = (const u32 *)buf; + p_type = hdr[0]; + p_size = hdr[1]; + + if (p_type != UCODE_UCODE_TYPE) { + if (!early) + pr_debug("Invalid type field (0x%x) in container file section header.\n", + p_type); + + return false; + } + + if (p_size < sizeof(struct microcode_header_amd)) { + if (!early) + pr_debug("Patch of size %u too short.\n", p_size); + + return false; + } + + *sh_psize = p_size; + + return true; +} + +/* + * Check whether the passed remaining file @buf_size is large enough to contain + * a patch of the indicated @sh_psize (and also whether this size does not + * exceed the per-family maximum). @sh_psize is the size read from the section + * header. + */ +static unsigned int __verify_patch_size(u8 family, u32 sh_psize, size_t buf_size) +{ + u32 max_size; + + if (family >= 0x15) + return min_t(u32, sh_psize, buf_size); + +#define F1XH_MPB_MAX_SIZE 2048 +#define F14H_MPB_MAX_SIZE 1824 + + switch (family) { + case 0x10 ... 0x12: + max_size = F1XH_MPB_MAX_SIZE; + break; + case 0x14: + max_size = F14H_MPB_MAX_SIZE; + break; + default: + WARN(1, "%s: WTF family: 0x%x\n", __func__, family); + return 0; + break; + } + + if (sh_psize > min_t(u32, buf_size, max_size)) + return 0; + + return sh_psize; +} + +/* + * Verify the patch in @buf. + * + * Returns: + * negative: on error + * positive: patch is not for this family, skip it + * 0: success + */ +static int +verify_patch(u8 family, const u8 *buf, size_t buf_size, u32 *patch_size, bool early) +{ + struct microcode_header_amd *mc_hdr; + unsigned int ret; + u32 sh_psize; + u16 proc_id; + u8 patch_fam; + + if (!__verify_patch_section(buf, buf_size, &sh_psize, early)) + return -1; + + /* + * The section header length is not included in this indicated size + * but is present in the leftover file length so we need to subtract + * it before passing this value to the function below. + */ + buf_size -= SECTION_HDR_SIZE; + + /* + * Check if the remaining buffer is big enough to contain a patch of + * size sh_psize, as the section claims. + */ + if (buf_size < sh_psize) { + if (!early) + pr_debug("Patch of size %u truncated.\n", sh_psize); + + return -1; + } + + ret = __verify_patch_size(family, sh_psize, buf_size); + if (!ret) { + if (!early) + pr_debug("Per-family patch size mismatch.\n"); + return -1; + } + + *patch_size = sh_psize; + + mc_hdr = (struct microcode_header_amd *)(buf + SECTION_HDR_SIZE); + if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { + if (!early) + pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n", mc_hdr->patch_id); + return -1; } + proc_id = mc_hdr->processor_rev_id; + patch_fam = 0xf + (proc_id >> 12); + if (patch_fam != family) + return 1; + return 0; } @@ -80,26 +295,28 @@ static u16 find_equiv_id(struct equiv_cpu_entry *equiv_table, u32 sig) * Returns the amount of bytes consumed while scanning. @desc contains all the * data we're going to use in later stages of the application. */ -static ssize_t parse_container(u8 *ucode, ssize_t size, struct cont_desc *desc) +static size_t parse_container(u8 *ucode, size_t size, struct cont_desc *desc) { - struct equiv_cpu_entry *eq; - ssize_t orig_size = size; + struct equiv_cpu_table table; + size_t orig_size = size; u32 *hdr = (u32 *)ucode; u16 eq_id; u8 *buf; - /* Am I looking at an equivalence table header? */ - if (hdr[0] != UCODE_MAGIC || - hdr[1] != UCODE_EQUIV_CPU_TABLE_TYPE || - hdr[2] == 0) - return CONTAINER_HDR_SZ; + if (!verify_equivalence_table(ucode, size, true)) + return 0; buf = ucode; - eq = (struct equiv_cpu_entry *)(buf + CONTAINER_HDR_SZ); + table.entry = (struct equiv_cpu_entry *)(buf + CONTAINER_HDR_SZ); + table.num_entries = hdr[2] / sizeof(struct equiv_cpu_entry); - /* Find the equivalence ID of our CPU in this table: */ - eq_id = find_equiv_id(eq, desc->cpuid_1_eax); + /* + * Find the equivalence ID of our CPU in this table. Even if this table + * doesn't contain a patch for the CPU, scan through the whole container + * so that it can be skipped in case there are other containers appended. + */ + eq_id = find_equiv_id(&table, desc->cpuid_1_eax); buf += hdr[2] + CONTAINER_HDR_SZ; size -= hdr[2] + CONTAINER_HDR_SZ; @@ -111,29 +328,29 @@ static ssize_t parse_container(u8 *ucode, ssize_t size, struct cont_desc *desc) while (size > 0) { struct microcode_amd *mc; u32 patch_size; + int ret; + + ret = verify_patch(x86_family(desc->cpuid_1_eax), buf, size, &patch_size, true); + if (ret < 0) { + /* + * Patch verification failed, skip to the next + * container, if there's one: + */ + goto out; + } else if (ret > 0) { + goto skip; + } - hdr = (u32 *)buf; - - if (hdr[0] != UCODE_UCODE_TYPE) - break; - - /* Sanity-check patch size. */ - patch_size = hdr[1]; - if (patch_size > PATCH_MAX_SIZE) - break; - - /* Skip patch section header: */ - buf += SECTION_HDR_SIZE; - size -= SECTION_HDR_SIZE; - - mc = (struct microcode_amd *)buf; + mc = (struct microcode_amd *)(buf + SECTION_HDR_SIZE); if (eq_id == mc->hdr.processor_rev_id) { desc->psize = patch_size; desc->mc = mc; } - buf += patch_size; - size -= patch_size; +skip: + /* Skip patch section header too: */ + buf += patch_size + SECTION_HDR_SIZE; + size -= patch_size + SECTION_HDR_SIZE; } /* @@ -150,6 +367,7 @@ static ssize_t parse_container(u8 *ucode, ssize_t size, struct cont_desc *desc) return 0; } +out: return orig_size - size; } @@ -159,15 +377,18 @@ static ssize_t parse_container(u8 *ucode, ssize_t size, struct cont_desc *desc) */ static void scan_containers(u8 *ucode, size_t size, struct cont_desc *desc) { - ssize_t rem = size; - - while (rem >= 0) { - ssize_t s = parse_container(ucode, rem, desc); + while (size) { + size_t s = parse_container(ucode, size, desc); if (!s) return; - ucode += s; - rem -= s; + /* catch wraparound */ + if (size >= s) { + ucode += s; + size -= s; + } else { + return; + } } } @@ -364,21 +585,7 @@ void reload_ucode_amd(void) static u16 __find_equiv_id(unsigned int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - return find_equiv_id(equiv_cpu_table, uci->cpu_sig.sig); -} - -static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu) -{ - int i = 0; - - BUG_ON(!equiv_cpu_table); - - while (equiv_cpu_table[i].equiv_cpu != 0) { - if (equiv_cpu == equiv_cpu_table[i].equiv_cpu) - return equiv_cpu_table[i].installed_cpu; - i++; - } - return 0; + return find_equiv_id(&equiv_table, uci->cpu_sig.sig); } /* @@ -461,43 +668,6 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) return 0; } -static unsigned int verify_patch_size(u8 family, u32 patch_size, - unsigned int size) -{ - u32 max_size; - -#define F1XH_MPB_MAX_SIZE 2048 -#define F14H_MPB_MAX_SIZE 1824 -#define F15H_MPB_MAX_SIZE 4096 -#define F16H_MPB_MAX_SIZE 3458 -#define F17H_MPB_MAX_SIZE 3200 - - switch (family) { - case 0x14: - max_size = F14H_MPB_MAX_SIZE; - break; - case 0x15: - max_size = F15H_MPB_MAX_SIZE; - break; - case 0x16: - max_size = F16H_MPB_MAX_SIZE; - break; - case 0x17: - max_size = F17H_MPB_MAX_SIZE; - break; - default: - max_size = F1XH_MPB_MAX_SIZE; - break; - } - - if (patch_size > min_t(u32, size, max_size)) { - pr_err("patch size mismatch\n"); - return 0; - } - - return patch_size; -} - static enum ucode_state apply_microcode_amd(int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -548,34 +718,34 @@ out: return ret; } -static int install_equiv_cpu_table(const u8 *buf) +static size_t install_equiv_cpu_table(const u8 *buf, size_t buf_size) { - unsigned int *ibuf = (unsigned int *)buf; - unsigned int type = ibuf[1]; - unsigned int size = ibuf[2]; + u32 equiv_tbl_len; + const u32 *hdr; - if (type != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { - pr_err("empty section/" - "invalid type field in container file section header\n"); - return -EINVAL; - } + if (!verify_equivalence_table(buf, buf_size, false)) + return 0; + + hdr = (const u32 *)buf; + equiv_tbl_len = hdr[2]; - equiv_cpu_table = vmalloc(size); - if (!equiv_cpu_table) { + equiv_table.entry = vmalloc(equiv_tbl_len); + if (!equiv_table.entry) { pr_err("failed to allocate equivalent CPU table\n"); - return -ENOMEM; + return 0; } - memcpy(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size); + memcpy(equiv_table.entry, buf + CONTAINER_HDR_SZ, equiv_tbl_len); + equiv_table.num_entries = equiv_tbl_len / sizeof(struct equiv_cpu_entry); /* add header length */ - return size + CONTAINER_HDR_SZ; + return equiv_tbl_len + CONTAINER_HDR_SZ; } static void free_equiv_cpu_table(void) { - vfree(equiv_cpu_table); - equiv_cpu_table = NULL; + vfree(equiv_table.entry); + memset(&equiv_table, 0, sizeof(equiv_table)); } static void cleanup(void) @@ -585,47 +755,23 @@ static void cleanup(void) } /* - * We return the current size even if some of the checks failed so that + * Return a non-negative value even if some of the checks failed so that * we can skip over the next patch. If we return a negative value, we * signal a grave error like a memory allocation has failed and the * driver cannot continue functioning normally. In such cases, we tear * down everything we've used up so far and exit. */ -static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover) +static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover, + unsigned int *patch_size) { struct microcode_header_amd *mc_hdr; struct ucode_patch *patch; - unsigned int patch_size, crnt_size, ret; - u32 proc_fam; u16 proc_id; + int ret; - patch_size = *(u32 *)(fw + 4); - crnt_size = patch_size + SECTION_HDR_SIZE; - mc_hdr = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE); - proc_id = mc_hdr->processor_rev_id; - - proc_fam = find_cpu_family_by_equiv_cpu(proc_id); - if (!proc_fam) { - pr_err("No patch family for equiv ID: 0x%04x\n", proc_id); - return crnt_size; - } - - /* check if patch is for the current family */ - proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff); - if (proc_fam != family) - return crnt_size; - - if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { - pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n", - mc_hdr->patch_id); - return crnt_size; - } - - ret = verify_patch_size(family, patch_size, leftover); - if (!ret) { - pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id); - return crnt_size; - } + ret = verify_patch(family, fw, leftover, patch_size, false); + if (ret) + return ret; patch = kzalloc(sizeof(*patch), GFP_KERNEL); if (!patch) { @@ -633,13 +779,16 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover) return -EINVAL; } - patch->data = kmemdup(fw + SECTION_HDR_SIZE, patch_size, GFP_KERNEL); + patch->data = kmemdup(fw + SECTION_HDR_SIZE, *patch_size, GFP_KERNEL); if (!patch->data) { pr_err("Patch data allocation failure.\n"); kfree(patch); return -EINVAL; } + mc_hdr = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE); + proc_id = mc_hdr->processor_rev_id; + INIT_LIST_HEAD(&patch->plist); patch->patch_id = mc_hdr->patch_id; patch->equiv_cpu = proc_id; @@ -650,39 +799,38 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover) /* ... and add to cache. */ update_cache(patch); - return crnt_size; + return 0; } static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, size_t size) { - enum ucode_state ret = UCODE_ERROR; - unsigned int leftover; u8 *fw = (u8 *)data; - int crnt_size = 0; - int offset; + size_t offset; - offset = install_equiv_cpu_table(data); - if (offset < 0) { - pr_err("failed to create equivalent cpu table\n"); - return ret; - } - fw += offset; - leftover = size - offset; + offset = install_equiv_cpu_table(data, size); + if (!offset) + return UCODE_ERROR; + + fw += offset; + size -= offset; if (*(u32 *)fw != UCODE_UCODE_TYPE) { pr_err("invalid type field in container file section header\n"); free_equiv_cpu_table(); - return ret; + return UCODE_ERROR; } - while (leftover) { - crnt_size = verify_and_add_patch(family, fw, leftover); - if (crnt_size < 0) - return ret; + while (size > 0) { + unsigned int crnt_size = 0; + int ret; - fw += crnt_size; - leftover -= crnt_size; + ret = verify_and_add_patch(family, fw, size, &crnt_size); + if (ret < 0) + return UCODE_ERROR; + + fw += crnt_size + SECTION_HDR_SIZE; + size -= (crnt_size + SECTION_HDR_SIZE); } return UCODE_OK; @@ -761,10 +909,8 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, } ret = UCODE_ERROR; - if (*(u32 *)fw->data != UCODE_MAGIC) { - pr_err("invalid magic value (0x%08x)\n", *(u32 *)fw->data); + if (!verify_container(fw->data, fw->size, false)) goto fw_release; - } ret = load_microcode_amd(bsp, c->x86, fw->data, fw->size); diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 2e173d47b450..4d36dcc1cf87 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -165,6 +165,8 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) struct mtrr_gentry gentry; void __user *arg = (void __user *) __arg; + memset(&gentry, 0, sizeof(gentry)); + switch (cmd) { case MTRRIOC_ADD_ENTRY: case MTRRIOC_SET_ENTRY: diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile new file mode 100644 index 000000000000..6895049ceef7 --- /dev/null +++ b/arch/x86/kernel/cpu/resctrl/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_RESCTRL) += core.o rdtgroup.o monitor.o +obj-$(CONFIG_RESCTRL) += ctrlmondata.o pseudo_lock.o +CFLAGS_pseudo_lock.o = -I$(src) diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/resctrl/core.c index 44272b7107ad..c3a9dc63edf2 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -22,7 +22,7 @@ * Software Developer Manual June 2016, volume 3, section 17.17. */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define pr_fmt(fmt) "resctrl: " fmt #include <linux/slab.h> #include <linux/err.h> @@ -30,22 +30,19 @@ #include <linux/cpuhotplug.h> #include <asm/intel-family.h> -#include <asm/intel_rdt_sched.h> -#include "intel_rdt.h" - -#define MBA_IS_LINEAR 0x4 -#define MBA_MAX_MBPS U32_MAX +#include <asm/resctrl_sched.h> +#include "internal.h" /* Mutex to protect rdtgroup access. */ DEFINE_MUTEX(rdtgroup_mutex); /* - * The cached intel_pqr_state is strictly per CPU and can never be + * The cached resctrl_pqr_state is strictly per CPU and can never be * updated from a remote CPU. Functions which modify the state * are called with interrupts disabled and no preemption, which * is sufficient for the protection. */ -DEFINE_PER_CPU(struct intel_pqr_state, pqr_state); +DEFINE_PER_CPU(struct resctrl_pqr_state, pqr_state); /* * Used to store the max resource name width and max resource data width @@ -60,9 +57,13 @@ int max_name_width, max_data_width; bool rdt_alloc_capable; static void -mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r); +mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m, + struct rdt_resource *r); static void cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r); +static void +mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, + struct rdt_resource *r); #define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains) @@ -72,7 +73,7 @@ struct rdt_resource rdt_resources_all[] = { .rid = RDT_RESOURCE_L3, .name = "L3", .domains = domain_init(RDT_RESOURCE_L3), - .msr_base = IA32_L3_CBM_BASE, + .msr_base = MSR_IA32_L3_CBM_BASE, .msr_update = cat_wrmsr, .cache_level = 3, .cache = { @@ -89,7 +90,7 @@ struct rdt_resource rdt_resources_all[] = { .rid = RDT_RESOURCE_L3DATA, .name = "L3DATA", .domains = domain_init(RDT_RESOURCE_L3DATA), - .msr_base = IA32_L3_CBM_BASE, + .msr_base = MSR_IA32_L3_CBM_BASE, .msr_update = cat_wrmsr, .cache_level = 3, .cache = { @@ -106,7 +107,7 @@ struct rdt_resource rdt_resources_all[] = { .rid = RDT_RESOURCE_L3CODE, .name = "L3CODE", .domains = domain_init(RDT_RESOURCE_L3CODE), - .msr_base = IA32_L3_CBM_BASE, + .msr_base = MSR_IA32_L3_CBM_BASE, .msr_update = cat_wrmsr, .cache_level = 3, .cache = { @@ -123,7 +124,7 @@ struct rdt_resource rdt_resources_all[] = { .rid = RDT_RESOURCE_L2, .name = "L2", .domains = domain_init(RDT_RESOURCE_L2), - .msr_base = IA32_L2_CBM_BASE, + .msr_base = MSR_IA32_L2_CBM_BASE, .msr_update = cat_wrmsr, .cache_level = 2, .cache = { @@ -140,7 +141,7 @@ struct rdt_resource rdt_resources_all[] = { .rid = RDT_RESOURCE_L2DATA, .name = "L2DATA", .domains = domain_init(RDT_RESOURCE_L2DATA), - .msr_base = IA32_L2_CBM_BASE, + .msr_base = MSR_IA32_L2_CBM_BASE, .msr_update = cat_wrmsr, .cache_level = 2, .cache = { @@ -157,7 +158,7 @@ struct rdt_resource rdt_resources_all[] = { .rid = RDT_RESOURCE_L2CODE, .name = "L2CODE", .domains = domain_init(RDT_RESOURCE_L2CODE), - .msr_base = IA32_L2_CBM_BASE, + .msr_base = MSR_IA32_L2_CBM_BASE, .msr_update = cat_wrmsr, .cache_level = 2, .cache = { @@ -174,10 +175,7 @@ struct rdt_resource rdt_resources_all[] = { .rid = RDT_RESOURCE_MBA, .name = "MB", .domains = domain_init(RDT_RESOURCE_MBA), - .msr_base = IA32_MBA_THRTL_BASE, - .msr_update = mba_wrmsr, .cache_level = 3, - .parse_ctrlval = parse_bw, .format_str = "%d=%*u", .fflags = RFTYPE_RES_MB, }, @@ -211,9 +209,10 @@ static inline void cache_alloc_hsw_probe(void) struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3]; u32 l, h, max_cbm = BIT_MASK(20) - 1; - if (wrmsr_safe(IA32_L3_CBM_BASE, max_cbm, 0)) + if (wrmsr_safe(MSR_IA32_L3_CBM_BASE, max_cbm, 0)) return; - rdmsr(IA32_L3_CBM_BASE, l, h); + + rdmsr(MSR_IA32_L3_CBM_BASE, l, h); /* If all the bits were set in MSR, return success */ if (l != max_cbm) @@ -259,7 +258,7 @@ static inline bool rdt_get_mb_table(struct rdt_resource *r) return false; } -static bool rdt_get_mem_config(struct rdt_resource *r) +static bool __get_mem_config_intel(struct rdt_resource *r) { union cpuid_0x10_3_eax eax; union cpuid_0x10_x_edx edx; @@ -285,6 +284,30 @@ static bool rdt_get_mem_config(struct rdt_resource *r) return true; } +static bool __rdt_get_mem_config_amd(struct rdt_resource *r) +{ + union cpuid_0x10_3_eax eax; + union cpuid_0x10_x_edx edx; + u32 ebx, ecx; + + cpuid_count(0x80000020, 1, &eax.full, &ebx, &ecx, &edx.full); + r->num_closid = edx.split.cos_max + 1; + r->default_ctrl = MAX_MBA_BW_AMD; + + /* AMD does not use delay */ + r->membw.delay_linear = false; + + r->membw.min_bw = 0; + r->membw.bw_gran = 1; + /* Max value is 2048, Data width should be 4 in decimal */ + r->data_width = 4; + + r->alloc_capable = true; + r->alloc_enabled = true; + + return true; +} + static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r) { union cpuid_0x10_1_eax eax; @@ -344,6 +367,15 @@ static int get_cache_id(int cpu, int level) return -1; } +static void +mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) +{ + unsigned int i; + + for (i = m->low; i < m->high; i++) + wrmsrl(r->msr_base + i, d->ctrl_val[i]); +} + /* * Map the memory b/w percentage value to delay values * that can be written to QOS_MSRs. @@ -359,7 +391,8 @@ u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) } static void -mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) +mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m, + struct rdt_resource *r) { unsigned int i; @@ -421,7 +454,7 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, struct list_head *l; if (id < 0) - return ERR_PTR(id); + return ERR_PTR(-ENODEV); list_for_each(l, &r->domains) { d = list_entry(l, struct rdt_domain, list); @@ -639,7 +672,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) static void clear_closid_rmid(int cpu) { - struct intel_pqr_state *state = this_cpu_ptr(&pqr_state); + struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state); state->default_closid = 0; state->default_rmid = 0; @@ -648,7 +681,7 @@ static void clear_closid_rmid(int cpu) wrmsr(IA32_PQR_ASSOC, 0, 0); } -static int intel_rdt_online_cpu(unsigned int cpu) +static int resctrl_online_cpu(unsigned int cpu) { struct rdt_resource *r; @@ -674,7 +707,7 @@ static void clear_childcpus(struct rdtgroup *r, unsigned int cpu) } } -static int intel_rdt_offline_cpu(unsigned int cpu) +static int resctrl_offline_cpu(unsigned int cpu) { struct rdtgroup *rdtgrp; struct rdt_resource *r; @@ -794,6 +827,19 @@ static bool __init rdt_cpu_has(int flag) return ret; } +static __init bool get_mem_config(void) +{ + if (!rdt_cpu_has(X86_FEATURE_MBA)) + return false; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + return __get_mem_config_intel(&rdt_resources_all[RDT_RESOURCE_MBA]); + else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + return __rdt_get_mem_config_amd(&rdt_resources_all[RDT_RESOURCE_MBA]); + + return false; +} + static __init bool get_rdt_alloc_resources(void) { bool ret = false; @@ -818,10 +864,9 @@ static __init bool get_rdt_alloc_resources(void) ret = true; } - if (rdt_cpu_has(X86_FEATURE_MBA)) { - if (rdt_get_mem_config(&rdt_resources_all[RDT_RESOURCE_MBA])) - ret = true; - } + if (get_mem_config()) + ret = true; + return ret; } @@ -840,7 +885,7 @@ static __init bool get_rdt_mon_resources(void) return !rdt_get_mon_l3_config(&rdt_resources_all[RDT_RESOURCE_L3]); } -static __init void rdt_quirks(void) +static __init void __check_quirks_intel(void) { switch (boot_cpu_data.x86_model) { case INTEL_FAM6_HASWELL_X: @@ -855,30 +900,91 @@ static __init void rdt_quirks(void) } } +static __init void check_quirks(void) +{ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + __check_quirks_intel(); +} + static __init bool get_rdt_resources(void) { - rdt_quirks(); rdt_alloc_capable = get_rdt_alloc_resources(); rdt_mon_capable = get_rdt_mon_resources(); return (rdt_mon_capable || rdt_alloc_capable); } +static __init void rdt_init_res_defs_intel(void) +{ + struct rdt_resource *r; + + for_each_rdt_resource(r) { + if (r->rid == RDT_RESOURCE_L3 || + r->rid == RDT_RESOURCE_L3DATA || + r->rid == RDT_RESOURCE_L3CODE || + r->rid == RDT_RESOURCE_L2 || + r->rid == RDT_RESOURCE_L2DATA || + r->rid == RDT_RESOURCE_L2CODE) + r->cbm_validate = cbm_validate_intel; + else if (r->rid == RDT_RESOURCE_MBA) { + r->msr_base = MSR_IA32_MBA_THRTL_BASE; + r->msr_update = mba_wrmsr_intel; + r->parse_ctrlval = parse_bw_intel; + } + } +} + +static __init void rdt_init_res_defs_amd(void) +{ + struct rdt_resource *r; + + for_each_rdt_resource(r) { + if (r->rid == RDT_RESOURCE_L3 || + r->rid == RDT_RESOURCE_L3DATA || + r->rid == RDT_RESOURCE_L3CODE || + r->rid == RDT_RESOURCE_L2 || + r->rid == RDT_RESOURCE_L2DATA || + r->rid == RDT_RESOURCE_L2CODE) + r->cbm_validate = cbm_validate_amd; + else if (r->rid == RDT_RESOURCE_MBA) { + r->msr_base = MSR_IA32_MBA_BW_BASE; + r->msr_update = mba_wrmsr_amd; + r->parse_ctrlval = parse_bw_amd; + } + } +} + +static __init void rdt_init_res_defs(void) +{ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + rdt_init_res_defs_intel(); + else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + rdt_init_res_defs_amd(); +} + static enum cpuhp_state rdt_online; -static int __init intel_rdt_late_init(void) +static int __init resctrl_late_init(void) { struct rdt_resource *r; int state, ret; + /* + * Initialize functions(or definitions) that are different + * between vendors here. + */ + rdt_init_res_defs(); + + check_quirks(); + if (!get_rdt_resources()) return -ENODEV; rdt_init_padding(); state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, - "x86/rdt/cat:online:", - intel_rdt_online_cpu, intel_rdt_offline_cpu); + "x86/resctrl/cat:online:", + resctrl_online_cpu, resctrl_offline_cpu); if (state < 0) return state; @@ -890,20 +996,20 @@ static int __init intel_rdt_late_init(void) rdt_online = state; for_each_alloc_capable_rdt_resource(r) - pr_info("Intel RDT %s allocation detected\n", r->name); + pr_info("%s allocation detected\n", r->name); for_each_mon_capable_rdt_resource(r) - pr_info("Intel RDT %s monitoring detected\n", r->name); + pr_info("%s monitoring detected\n", r->name); return 0; } -late_initcall(intel_rdt_late_init); +late_initcall(resctrl_late_init); -static void __exit intel_rdt_exit(void) +static void __exit resctrl_exit(void) { cpuhp_remove_state(rdt_online); rdtgroup_exit(); } -__exitcall(intel_rdt_exit); +__exitcall(resctrl_exit); diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 27937458c231..2dbd990a2eb7 100644 --- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -23,10 +23,58 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/cpu.h> #include <linux/kernfs.h> #include <linux/seq_file.h> #include <linux/slab.h> -#include "intel_rdt.h" +#include "internal.h" + +/* + * Check whether MBA bandwidth percentage value is correct. The value is + * checked against the minimum and maximum bandwidth values specified by + * the hardware. The allocated bandwidth percentage is rounded to the next + * control step available on the hardware. + */ +static bool bw_validate_amd(char *buf, unsigned long *data, + struct rdt_resource *r) +{ + unsigned long bw; + int ret; + + ret = kstrtoul(buf, 10, &bw); + if (ret) { + rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf); + return false; + } + + if (bw < r->membw.min_bw || bw > r->default_ctrl) { + rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw, + r->membw.min_bw, r->default_ctrl); + return false; + } + + *data = roundup(bw, (unsigned long)r->membw.bw_gran); + return true; +} + +int parse_bw_amd(struct rdt_parse_data *data, struct rdt_resource *r, + struct rdt_domain *d) +{ + unsigned long bw_val; + + if (d->have_new_ctrl) { + rdt_last_cmd_printf("Duplicate domain %d\n", d->id); + return -EINVAL; + } + + if (!bw_validate_amd(data->buf, &bw_val, r)) + return -EINVAL; + + d->new_ctrl = bw_val; + d->have_new_ctrl = true; + + return 0; +} /* * Check whether MBA bandwidth percentage value is correct. The value is @@ -64,13 +112,13 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r) return true; } -int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r, - struct rdt_domain *d) +int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r, + struct rdt_domain *d) { unsigned long bw_val; if (d->have_new_ctrl) { - rdt_last_cmd_printf("duplicate domain %d\n", d->id); + rdt_last_cmd_printf("Duplicate domain %d\n", d->id); return -EINVAL; } @@ -88,7 +136,7 @@ int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r, * are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.). * Additionally Haswell requires at least two bits set. */ -static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) +bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r) { unsigned long first_bit, zero_bit, val; unsigned int cbm_len = r->cache.cbm_len; @@ -96,12 +144,12 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) ret = kstrtoul(buf, 16, &val); if (ret) { - rdt_last_cmd_printf("non-hex character in mask %s\n", buf); + rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf); return false; } if (val == 0 || val > r->default_ctrl) { - rdt_last_cmd_puts("mask out of range\n"); + rdt_last_cmd_puts("Mask out of range\n"); return false; } @@ -109,12 +157,12 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len) { - rdt_last_cmd_printf("mask %lx has non-consecutive 1-bits\n", val); + rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val); return false; } if ((zero_bit - first_bit) < r->cache.min_cbm_bits) { - rdt_last_cmd_printf("Need at least %d bits in mask\n", + rdt_last_cmd_printf("Need at least %d bits in the mask\n", r->cache.min_cbm_bits); return false; } @@ -124,6 +172,30 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) } /* + * Check whether a cache bit mask is valid. AMD allows non-contiguous + * bitmasks + */ +bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r) +{ + unsigned long val; + int ret; + + ret = kstrtoul(buf, 16, &val); + if (ret) { + rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf); + return false; + } + + if (val > r->default_ctrl) { + rdt_last_cmd_puts("Mask out of range\n"); + return false; + } + + *data = val; + return true; +} + +/* * Read one cache bit mask (hex). Check that it is valid for the current * resource type. */ @@ -134,7 +206,7 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, u32 cbm_val; if (d->have_new_ctrl) { - rdt_last_cmd_printf("duplicate domain %d\n", d->id); + rdt_last_cmd_printf("Duplicate domain %d\n", d->id); return -EINVAL; } @@ -144,17 +216,17 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, */ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && rdtgroup_pseudo_locked_in_hierarchy(d)) { - rdt_last_cmd_printf("pseudo-locked region in hierarchy\n"); + rdt_last_cmd_puts("Pseudo-locked region in hierarchy\n"); return -EINVAL; } - if (!cbm_validate(data->buf, &cbm_val, r)) + if (!r->cbm_validate(data->buf, &cbm_val, r)) return -EINVAL; if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE || rdtgrp->mode == RDT_MODE_SHAREABLE) && rdtgroup_cbm_overlaps_pseudo_locked(d, cbm_val)) { - rdt_last_cmd_printf("CBM overlaps with pseudo-locked region\n"); + rdt_last_cmd_puts("CBM overlaps with pseudo-locked region\n"); return -EINVAL; } @@ -163,14 +235,14 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, * either is exclusive. */ if (rdtgroup_cbm_overlaps(r, d, cbm_val, rdtgrp->closid, true)) { - rdt_last_cmd_printf("overlaps with exclusive group\n"); + rdt_last_cmd_puts("Overlaps with exclusive group\n"); return -EINVAL; } if (rdtgroup_cbm_overlaps(r, d, cbm_val, rdtgrp->closid, false)) { if (rdtgrp->mode == RDT_MODE_EXCLUSIVE || rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { - rdt_last_cmd_printf("overlaps with other group\n"); + rdt_last_cmd_puts("Overlaps with other group\n"); return -EINVAL; } } @@ -292,7 +364,7 @@ static int rdtgroup_parse_resource(char *resname, char *tok, if (!strcmp(resname, r->name) && rdtgrp->closid < r->num_closid) return parse_line(tok, r, rdtgrp); } - rdt_last_cmd_printf("unknown/unsupported resource name '%s'\n", resname); + rdt_last_cmd_printf("Unknown or unsupported resource name '%s'\n", resname); return -EINVAL; } @@ -310,9 +382,11 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, return -EINVAL; buf[nbytes - 1] = '\0'; + cpus_read_lock(); rdtgrp = rdtgroup_kn_lock_live(of->kn); if (!rdtgrp) { rdtgroup_kn_unlock(of->kn); + cpus_read_unlock(); return -ENOENT; } rdt_last_cmd_clear(); @@ -323,7 +397,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, */ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { ret = -EINVAL; - rdt_last_cmd_puts("resource group is pseudo-locked\n"); + rdt_last_cmd_puts("Resource group is pseudo-locked\n"); goto out; } @@ -367,6 +441,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, out: rdtgroup_kn_unlock(of->kn); + cpus_read_unlock(); return ret ?: nbytes; } @@ -463,7 +538,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) r = &rdt_resources_all[resid]; d = rdt_find_domain(r, domid, NULL); - if (!d) { + if (IS_ERR_OR_NULL(d)) { ret = -ENOENT; goto out; } diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/resctrl/internal.h index 3736f6dc9545..822b7db634ee 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -1,20 +1,24 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_INTEL_RDT_H -#define _ASM_X86_INTEL_RDT_H +#ifndef _ASM_X86_RESCTRL_INTERNAL_H +#define _ASM_X86_RESCTRL_INTERNAL_H #include <linux/sched.h> #include <linux/kernfs.h> #include <linux/jump_label.h> -#define IA32_L3_QOS_CFG 0xc81 -#define IA32_L2_QOS_CFG 0xc82 -#define IA32_L3_CBM_BASE 0xc90 -#define IA32_L2_CBM_BASE 0xd10 -#define IA32_MBA_THRTL_BASE 0xd50 +#define MSR_IA32_L3_QOS_CFG 0xc81 +#define MSR_IA32_L2_QOS_CFG 0xc82 +#define MSR_IA32_L3_CBM_BASE 0xc90 +#define MSR_IA32_L2_CBM_BASE 0xd10 +#define MSR_IA32_MBA_THRTL_BASE 0xd50 +#define MSR_IA32_MBA_BW_BASE 0xc0000200 -#define L3_QOS_CDP_ENABLE 0x01ULL +#define MSR_IA32_QM_CTR 0x0c8e +#define MSR_IA32_QM_EVTSEL 0x0c8d -#define L2_QOS_CDP_ENABLE 0x01ULL +#define L3_QOS_CDP_ENABLE 0x01ULL + +#define L2_QOS_CDP_ENABLE 0x01ULL /* * Event IDs are used to program IA32_QM_EVTSEL before reading event @@ -29,6 +33,9 @@ #define MBM_CNTR_WIDTH 24 #define MBM_OVERFLOW_INTERVAL 1000 #define MAX_MBA_BW 100u +#define MBA_IS_LINEAR 0x4 +#define MBA_MAX_MBPS U32_MAX +#define MAX_MBA_BW_AMD 0x800 #define RMID_VAL_ERROR BIT_ULL(63) #define RMID_VAL_UNAVAIL BIT_ULL(62) @@ -69,7 +76,7 @@ struct rmid_read { u64 val; }; -extern unsigned int intel_cqm_threshold; +extern unsigned int resctrl_cqm_threshold; extern bool rdt_alloc_capable; extern bool rdt_mon_capable; extern unsigned int rdt_mon_features; @@ -391,9 +398,9 @@ struct rdt_parse_data { * struct rdt_resource - attributes of an RDT resource * @rid: The index of the resource * @alloc_enabled: Is allocation enabled on this machine - * @mon_enabled: Is monitoring enabled for this feature + * @mon_enabled: Is monitoring enabled for this feature * @alloc_capable: Is allocation available on this machine - * @mon_capable: Is monitor feature available on this machine + * @mon_capable: Is monitor feature available on this machine * @name: Name to use in "schemata" file * @num_closid: Number of CLOSIDs available * @cache_level: Which cache level defines scope of this resource @@ -405,10 +412,11 @@ struct rdt_parse_data { * @cache: Cache allocation related data * @format_str: Per resource format string to show domain value * @parse_ctrlval: Per resource function pointer to parse control values - * @evt_list: List of monitoring events - * @num_rmid: Number of RMIDs available - * @mon_scale: cqm counter * mon_scale = occupancy in bytes - * @fflags: flags to choose base and info files + * @cbm_validate Cache bitmask validate function + * @evt_list: List of monitoring events + * @num_rmid: Number of RMIDs available + * @mon_scale: cqm counter * mon_scale = occupancy in bytes + * @fflags: flags to choose base and info files */ struct rdt_resource { int rid; @@ -431,6 +439,7 @@ struct rdt_resource { int (*parse_ctrlval)(struct rdt_parse_data *data, struct rdt_resource *r, struct rdt_domain *d); + bool (*cbm_validate)(char *buf, u32 *data, struct rdt_resource *r); struct list_head evt_list; int num_rmid; unsigned int mon_scale; @@ -439,8 +448,10 @@ struct rdt_resource { int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, struct rdt_domain *d); -int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r, - struct rdt_domain *d); +int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r, + struct rdt_domain *d); +int parse_bw_amd(struct rdt_parse_data *data, struct rdt_resource *r, + struct rdt_domain *d); extern struct mutex rdtgroup_mutex; @@ -463,6 +474,10 @@ enum { RDT_NUM_RESOURCES, }; +#define for_each_rdt_resource(r) \ + for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\ + r++) + #define for_each_capable_rdt_resource(r) \ for (r = rdt_resources_all; r < rdt_resources_all + RDT_NUM_RESOURCES;\ r++) \ @@ -567,5 +582,7 @@ void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms); void cqm_handle_limbo(struct work_struct *work); bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); void __check_limbo(struct rdt_domain *d, bool force_free); +bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r); +bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r); -#endif /* _ASM_X86_INTEL_RDT_H */ +#endif /* _ASM_X86_RESCTRL_INTERNAL_H */ diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index b0f3aed76b75..f33f11f69078 100644 --- a/arch/x86/kernel/cpu/intel_rdt_monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -26,10 +26,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <asm/cpu_device_id.h> -#include "intel_rdt.h" - -#define MSR_IA32_QM_CTR 0x0c8e -#define MSR_IA32_QM_EVTSEL 0x0c8d +#include "internal.h" struct rmid_entry { u32 rmid; @@ -73,7 +70,7 @@ unsigned int rdt_mon_features; * This is the threshold cache occupancy at which we will consider an * RMID available for re-allocation. */ -unsigned int intel_cqm_threshold; +unsigned int resctrl_cqm_threshold; static inline struct rmid_entry *__rmid_entry(u32 rmid) { @@ -107,7 +104,7 @@ static bool rmid_dirty(struct rmid_entry *entry) { u64 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID); - return val >= intel_cqm_threshold; + return val >= resctrl_cqm_threshold; } /* @@ -187,7 +184,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry) list_for_each_entry(d, &r->domains, list) { if (cpumask_test_cpu(cpu, &d->cpu_mask)) { val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID); - if (val <= intel_cqm_threshold) + if (val <= resctrl_cqm_threshold) continue; } @@ -625,6 +622,7 @@ static void l3_mon_evt_init(struct rdt_resource *r) int rdt_get_mon_l3_config(struct rdt_resource *r) { + unsigned int cl_size = boot_cpu_data.x86_cache_size; int ret; r->mon_scale = boot_cpu_data.x86_cache_occ_scale; @@ -637,10 +635,10 @@ int rdt_get_mon_l3_config(struct rdt_resource *r) * * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC. */ - intel_cqm_threshold = boot_cpu_data.x86_cache_size * 1024 / r->num_rmid; + resctrl_cqm_threshold = cl_size * 1024 / r->num_rmid; /* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */ - intel_cqm_threshold /= r->mon_scale; + resctrl_cqm_threshold /= r->mon_scale; ret = dom_data_init(r); if (ret) diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 815b4e92522c..14bed6af8377 100644 --- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -24,14 +24,14 @@ #include <asm/cacheflush.h> #include <asm/intel-family.h> -#include <asm/intel_rdt_sched.h> +#include <asm/resctrl_sched.h> #include <asm/perf_event.h> #include "../../events/perf_event.h" /* For X86_CONFIG() */ -#include "intel_rdt.h" +#include "internal.h" #define CREATE_TRACE_POINTS -#include "intel_rdt_pseudo_lock_event.h" +#include "pseudo_lock_event.h" /* * MSR_MISC_FEATURE_CONTROL register enables the modification of hardware @@ -213,7 +213,7 @@ static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr) for_each_cpu(cpu, &plr->d->cpu_mask) { pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL); if (!pm_req) { - rdt_last_cmd_puts("fail allocating mem for PM QoS\n"); + rdt_last_cmd_puts("Failure to allocate memory for PM QoS\n"); ret = -ENOMEM; goto out_err; } @@ -222,7 +222,7 @@ static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr) DEV_PM_QOS_RESUME_LATENCY, 30); if (ret < 0) { - rdt_last_cmd_printf("fail to add latency req cpu%d\n", + rdt_last_cmd_printf("Failed to add latency req CPU%d\n", cpu); kfree(pm_req); ret = -1; @@ -289,7 +289,7 @@ static int pseudo_lock_region_init(struct pseudo_lock_region *plr) plr->cpu = cpumask_first(&plr->d->cpu_mask); if (!cpu_online(plr->cpu)) { - rdt_last_cmd_printf("cpu %u associated with cache not online\n", + rdt_last_cmd_printf("CPU %u associated with cache not online\n", plr->cpu); ret = -ENODEV; goto out_region; @@ -307,7 +307,7 @@ static int pseudo_lock_region_init(struct pseudo_lock_region *plr) } ret = -1; - rdt_last_cmd_puts("unable to determine cache line size\n"); + rdt_last_cmd_puts("Unable to determine cache line size\n"); out_region: pseudo_lock_region_clear(plr); return ret; @@ -361,14 +361,14 @@ static int pseudo_lock_region_alloc(struct pseudo_lock_region *plr) * KMALLOC_MAX_SIZE. */ if (plr->size > KMALLOC_MAX_SIZE) { - rdt_last_cmd_puts("requested region exceeds maximum size\n"); + rdt_last_cmd_puts("Requested region exceeds maximum size\n"); ret = -E2BIG; goto out_region; } plr->kmem = kzalloc(plr->size, GFP_KERNEL); if (!plr->kmem) { - rdt_last_cmd_puts("unable to allocate memory\n"); + rdt_last_cmd_puts("Unable to allocate memory\n"); ret = -ENOMEM; goto out_region; } @@ -665,7 +665,7 @@ int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp) * default closid associated with it. */ if (rdtgrp == &rdtgroup_default) { - rdt_last_cmd_puts("cannot pseudo-lock default group\n"); + rdt_last_cmd_puts("Cannot pseudo-lock default group\n"); return -EINVAL; } @@ -707,17 +707,17 @@ int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp) */ prefetch_disable_bits = get_prefetch_disable_bits(); if (prefetch_disable_bits == 0) { - rdt_last_cmd_puts("pseudo-locking not supported\n"); + rdt_last_cmd_puts("Pseudo-locking not supported\n"); return -EINVAL; } if (rdtgroup_monitor_in_progress(rdtgrp)) { - rdt_last_cmd_puts("monitoring in progress\n"); + rdt_last_cmd_puts("Monitoring in progress\n"); return -EINVAL; } if (rdtgroup_tasks_assigned(rdtgrp)) { - rdt_last_cmd_puts("tasks assigned to resource group\n"); + rdt_last_cmd_puts("Tasks assigned to resource group\n"); return -EINVAL; } @@ -727,13 +727,13 @@ int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp) } if (rdtgroup_locksetup_user_restrict(rdtgrp)) { - rdt_last_cmd_puts("unable to modify resctrl permissions\n"); + rdt_last_cmd_puts("Unable to modify resctrl permissions\n"); return -EIO; } ret = pseudo_lock_init(rdtgrp); if (ret) { - rdt_last_cmd_puts("unable to init pseudo-lock region\n"); + rdt_last_cmd_puts("Unable to init pseudo-lock region\n"); goto out_release; } @@ -770,7 +770,7 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp) if (rdt_mon_capable) { ret = alloc_rmid(); if (ret < 0) { - rdt_last_cmd_puts("out of RMIDs\n"); + rdt_last_cmd_puts("Out of RMIDs\n"); return ret; } rdtgrp->mon.rmid = ret; @@ -1304,7 +1304,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) "pseudo_lock/%u", plr->cpu); if (IS_ERR(thread)) { ret = PTR_ERR(thread); - rdt_last_cmd_printf("locking thread returned error %d\n", ret); + rdt_last_cmd_printf("Locking thread returned error %d\n", ret); goto out_cstates; } @@ -1322,13 +1322,13 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) * the cleared, but not freed, plr struct resulting in an * empty pseudo-locking loop. */ - rdt_last_cmd_puts("locking thread interrupted\n"); + rdt_last_cmd_puts("Locking thread interrupted\n"); goto out_cstates; } ret = pseudo_lock_minor_get(&new_minor); if (ret < 0) { - rdt_last_cmd_puts("unable to obtain a new minor number\n"); + rdt_last_cmd_puts("Unable to obtain a new minor number\n"); goto out_cstates; } @@ -1360,7 +1360,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp) if (IS_ERR(dev)) { ret = PTR_ERR(dev); - rdt_last_cmd_printf("failed to create character device: %d\n", + rdt_last_cmd_printf("Failed to create character device: %d\n", ret); goto out_debugfs; } diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock_event.h b/arch/x86/kernel/cpu/resctrl/pseudo_lock_event.h index 2c041e6d9f05..428ebbd4270b 100644 --- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock_event.h +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock_event.h @@ -39,5 +39,5 @@ TRACE_EVENT(pseudo_lock_l3, #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE intel_rdt_pseudo_lock_event +#define TRACE_INCLUDE_FILE pseudo_lock_event #include <trace/define_trace.h> diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index f27b8115ffa2..8388adf241b2 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -35,8 +35,8 @@ #include <uapi/linux/magic.h> -#include <asm/intel_rdt_sched.h> -#include "intel_rdt.h" +#include <asm/resctrl_sched.h> +#include "internal.h" DEFINE_STATIC_KEY_FALSE(rdt_enable_key); DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key); @@ -298,7 +298,7 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of, } /* - * This is safe against intel_rdt_sched_in() called from __switch_to() + * This is safe against resctrl_sched_in() called from __switch_to() * because __switch_to() is executed with interrupts disabled. A local call * from update_closid_rmid() is proteced against __switch_to() because * preemption is disabled. @@ -317,7 +317,7 @@ static void update_cpu_closid_rmid(void *info) * executing task might have its own closid selected. Just reuse * the context switch code. */ - intel_rdt_sched_in(); + resctrl_sched_in(); } /* @@ -345,7 +345,7 @@ static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, /* Check whether cpus belong to parent ctrl group */ cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask); if (cpumask_weight(tmpmask)) { - rdt_last_cmd_puts("can only add CPUs to mongroup that belong to parent\n"); + rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n"); return -EINVAL; } @@ -470,14 +470,14 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, rdt_last_cmd_clear(); if (!rdtgrp) { ret = -ENOENT; - rdt_last_cmd_puts("directory was removed\n"); + rdt_last_cmd_puts("Directory was removed\n"); goto unlock; } if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED || rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { ret = -EINVAL; - rdt_last_cmd_puts("pseudo-locking in progress\n"); + rdt_last_cmd_puts("Pseudo-locking in progress\n"); goto unlock; } @@ -487,7 +487,7 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, ret = cpumask_parse(buf, newmask); if (ret) { - rdt_last_cmd_puts("bad cpu list/mask\n"); + rdt_last_cmd_puts("Bad CPU list/mask\n"); goto unlock; } @@ -495,7 +495,7 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, cpumask_andnot(tmpmask, newmask, cpu_online_mask); if (cpumask_weight(tmpmask)) { ret = -EINVAL; - rdt_last_cmd_puts("can only assign online cpus\n"); + rdt_last_cmd_puts("Can only assign online CPUs\n"); goto unlock; } @@ -542,7 +542,7 @@ static void move_myself(struct callback_head *head) preempt_disable(); /* update PQR_ASSOC MSR to make resource group go into effect */ - intel_rdt_sched_in(); + resctrl_sched_in(); preempt_enable(); kfree(callback); @@ -574,7 +574,7 @@ static int __rdtgroup_move_task(struct task_struct *tsk, */ atomic_dec(&rdtgrp->waitcount); kfree(callback); - rdt_last_cmd_puts("task exited\n"); + rdt_last_cmd_puts("Task exited\n"); } else { /* * For ctrl_mon groups move both closid and rmid. @@ -692,7 +692,7 @@ static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED || rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { ret = -EINVAL; - rdt_last_cmd_puts("pseudo-locking in progress\n"); + rdt_last_cmd_puts("Pseudo-locking in progress\n"); goto unlock; } @@ -926,7 +926,7 @@ static int max_threshold_occ_show(struct kernfs_open_file *of, { struct rdt_resource *r = of->kn->parent->priv; - seq_printf(seq, "%u\n", intel_cqm_threshold * r->mon_scale); + seq_printf(seq, "%u\n", resctrl_cqm_threshold * r->mon_scale); return 0; } @@ -945,7 +945,7 @@ static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, if (bytes > (boot_cpu_data.x86_cache_size * 1024)) return -EINVAL; - intel_cqm_threshold = bytes / r->mon_scale; + resctrl_cqm_threshold = bytes / r->mon_scale; return nbytes; } @@ -1029,7 +1029,7 @@ static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d, * peer RDT CDP resource. Hence the WARN. */ _d_cdp = rdt_find_domain(_r_cdp, d->id, NULL); - if (WARN_ON(!_d_cdp)) { + if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) { _r_cdp = NULL; ret = -EINVAL; } @@ -1158,14 +1158,14 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) list_for_each_entry(d, &r->domains, list) { if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid], rdtgrp->closid, false)) { - rdt_last_cmd_puts("schemata overlaps\n"); + rdt_last_cmd_puts("Schemata overlaps\n"); return false; } } } if (!has_cache) { - rdt_last_cmd_puts("cannot be exclusive without CAT/CDP\n"); + rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n"); return false; } @@ -1206,7 +1206,7 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of, goto out; if (mode == RDT_MODE_PSEUDO_LOCKED) { - rdt_last_cmd_printf("cannot change pseudo-locked group\n"); + rdt_last_cmd_puts("Cannot change pseudo-locked group\n"); ret = -EINVAL; goto out; } @@ -1235,7 +1235,7 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of, goto out; rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP; } else { - rdt_last_cmd_printf("unknown/unsupported mode\n"); + rdt_last_cmd_puts("Unknown or unsupported mode\n"); ret = -EINVAL; } @@ -1722,14 +1722,14 @@ static void l3_qos_cfg_update(void *arg) { bool *enable = arg; - wrmsrl(IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL); + wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL); } static void l2_qos_cfg_update(void *arg) { bool *enable = arg; - wrmsrl(IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL); + wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL); } static inline bool is_mba_linear(void) @@ -1878,7 +1878,10 @@ static int parse_rdtgroupfs_options(char *data) if (ret) goto out; } else if (!strcmp(token, "mba_MBps")) { - ret = set_mba_sc(true); + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + ret = set_mba_sc(true); + else + ret = -EINVAL; if (ret) goto out; } else { @@ -2540,7 +2543,7 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) tmp_cbm = d->new_ctrl; if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) { - rdt_last_cmd_printf("no space on %s:%d\n", + rdt_last_cmd_printf("No space on %s:%d\n", r->name, d->id); return -ENOSPC; } @@ -2557,7 +2560,7 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) continue; ret = update_domains(r, rdtgrp->closid); if (ret < 0) { - rdt_last_cmd_puts("failed to initialize allocations\n"); + rdt_last_cmd_puts("Failed to initialize allocations\n"); return ret; } rdtgrp->mode = RDT_MODE_SHAREABLE; @@ -2580,7 +2583,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, rdt_last_cmd_clear(); if (!prdtgrp) { ret = -ENODEV; - rdt_last_cmd_puts("directory was removed\n"); + rdt_last_cmd_puts("Directory was removed\n"); goto out_unlock; } @@ -2588,7 +2591,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) { ret = -EINVAL; - rdt_last_cmd_puts("pseudo-locking in progress\n"); + rdt_last_cmd_puts("Pseudo-locking in progress\n"); goto out_unlock; } @@ -2596,7 +2599,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL); if (!rdtgrp) { ret = -ENOSPC; - rdt_last_cmd_puts("kernel out of memory\n"); + rdt_last_cmd_puts("Kernel out of memory\n"); goto out_unlock; } *r = rdtgrp; @@ -2637,7 +2640,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, if (rdt_mon_capable) { ret = alloc_rmid(); if (ret < 0) { - rdt_last_cmd_puts("out of RMIDs\n"); + rdt_last_cmd_puts("Out of RMIDs\n"); goto out_destroy; } rdtgrp->mon.rmid = ret; @@ -2725,7 +2728,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, kn = rdtgrp->kn; ret = closid_alloc(); if (ret < 0) { - rdt_last_cmd_puts("out of CLOSIDs\n"); + rdt_last_cmd_puts("Out of CLOSIDs\n"); goto out_common_fail; } closid = ret; diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 772c219b6889..a4d74d616222 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -17,7 +17,11 @@ struct cpuid_bit { u32 sub_leaf; }; -/* Please keep the leaf sorted by cpuid_bit.level for faster search. */ +/* + * Please keep the leaf sorted by cpuid_bit.level for faster search. + * X86_FEATURE_MBA is supported by both Intel and AMD. But the CPUID + * levels are different and there is a separate entry for each. + */ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, @@ -29,6 +33,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 }, + { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, { X86_FEATURE_SME, CPUID_EAX, 0, 0x8000001f, 0 }, { X86_FEATURE_SEV, CPUID_EAX, 1, 0x8000001f, 0 }, { 0, 0, 0, 0, 0 } |