diff options
Diffstat (limited to 'arch/arm64/kernel')
61 files changed, 3303 insertions, 1626 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index b87541360f43..4c8b13bede80 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -18,7 +18,8 @@ arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ hyp-stub.o psci.o cpu_ops.o insn.o \ return_address.o cpuinfo.o cpu_errata.o \ cpufeature.o alternative.o cacheinfo.o \ - smp.o smp_spin_table.o topology.o smccc-call.o + smp.o smp_spin_table.o topology.o smccc-call.o \ + syscall.o extra-$(CONFIG_EFI) := efi-entry.o @@ -27,7 +28,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE $(call if_changed,objcopy) arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ - sys_compat.o entry32.o + sys_compat.o arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o arm64-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o @@ -38,7 +39,8 @@ arm64-obj-$(CONFIG_CPU_PM) += sleep.o suspend.o arm64-obj-$(CONFIG_CPU_IDLE) += cpuidle.o arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o arm64-obj-$(CONFIG_KGDB) += kgdb.o -arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o +arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o \ + efi-rt-wrapper.o arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ACPI) += acpi.o @@ -52,11 +54,9 @@ arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \ arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +arm64-obj-$(CONFIG_CRASH_CORE) += crash_core.o arm64-obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o - -ifeq ($(CONFIG_KVM),y) -arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o -endif +arm64-obj-$(CONFIG_ARM64_SSBD) += ssbd.o obj-y += $(arm64-obj-y) vdso/ probes/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 7b09487ff8fb..44e3c351e1ea 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -16,8 +16,8 @@ #define pr_fmt(fmt) "ACPI: " fmt #include <linux/acpi.h> -#include <linux/bootmem.h> #include <linux/cpumask.h> +#include <linux/efi.h> #include <linux/efi-bgrt.h> #include <linux/init.h> #include <linux/irq.h> @@ -29,13 +29,9 @@ #include <asm/cputype.h> #include <asm/cpu_ops.h> +#include <asm/pgtable.h> #include <asm/smp_plat.h> -#ifdef CONFIG_ACPI_APEI -# include <linux/efi.h> -# include <asm/pgtable.h> -#endif - int acpi_noirq = 1; /* skip ACPI IRQ initialization */ int acpi_disabled = 1; EXPORT_SYMBOL(acpi_disabled); @@ -239,8 +235,7 @@ done: } } -#ifdef CONFIG_ACPI_APEI -pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) +pgprot_t __acpi_get_mem_attribute(phys_addr_t addr) { /* * According to "Table 8 Map: EFI memory types to AArch64 memory @@ -261,4 +256,3 @@ pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) return __pgprot(PROT_NORMAL_NC); return __pgprot(PROT_DEVICE_nGnRnE); } -#endif diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index d190a7b231bf..eac1d0cc595c 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -18,7 +18,6 @@ #include <linux/acpi.h> #include <linux/bitmap.h> -#include <linux/bootmem.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/memblock.h> @@ -26,36 +25,73 @@ #include <linux/module.h> #include <linux/topology.h> -#include <acpi/processor.h> #include <asm/numa.h> -static int cpus_in_srat; +static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE }; -struct __node_cpu_hwid { - u32 node_id; /* logical node containing this CPU */ - u64 cpu_hwid; /* MPIDR for this CPU */ -}; +int __init acpi_numa_get_nid(unsigned int cpu) +{ + return acpi_early_node_map[cpu]; +} + +static inline int get_cpu_for_acpi_id(u32 uid) +{ + int cpu; + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (uid == get_acpi_id_for_cpu(cpu)) + return cpu; -static struct __node_cpu_hwid early_node_cpu_hwid[NR_CPUS] = { -[0 ... NR_CPUS - 1] = {NUMA_NO_NODE, PHYS_CPUID_INVALID} }; + return -EINVAL; +} -int acpi_numa_get_nid(unsigned int cpu, u64 hwid) +static int __init acpi_parse_gicc_pxm(struct acpi_subtable_header *header, + const unsigned long end) { - int i; + struct acpi_srat_gicc_affinity *pa; + int cpu, pxm, node; - for (i = 0; i < cpus_in_srat; i++) { - if (hwid == early_node_cpu_hwid[i].cpu_hwid) - return early_node_cpu_hwid[i].node_id; - } + if (srat_disabled()) + return -EINVAL; + + pa = (struct acpi_srat_gicc_affinity *)header; + if (!pa) + return -EINVAL; + + if (!(pa->flags & ACPI_SRAT_GICC_ENABLED)) + return 0; - return NUMA_NO_NODE; + pxm = pa->proximity_domain; + node = pxm_to_node(pxm); + + /* + * If we can't map the UID to a logical cpu this + * means that the UID is not part of possible cpus + * so we do not need a NUMA mapping for it, skip + * the SRAT entry and keep parsing. + */ + cpu = get_cpu_for_acpi_id(pa->acpi_processor_uid); + if (cpu < 0) + return 0; + + acpi_early_node_map[cpu] = node; + pr_info("SRAT: PXM %d -> MPIDR 0x%llx -> Node %d\n", pxm, + cpu_logical_map(cpu), node); + + return 0; +} + +void __init acpi_map_cpus_to_nodes(void) +{ + acpi_table_parse_entries(ACPI_SIG_SRAT, sizeof(struct acpi_table_srat), + ACPI_SRAT_TYPE_GICC_AFFINITY, + acpi_parse_gicc_pxm, 0); } /* Callback for Proximity Domain -> ACPI processor UID mapping */ void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { int pxm, node; - phys_cpuid_t mpidr; if (srat_disabled()) return; @@ -70,12 +106,6 @@ void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) if (!(pa->flags & ACPI_SRAT_GICC_ENABLED)) return; - if (cpus_in_srat >= NR_CPUS) { - pr_warn_once("SRAT: cpu_to_node_map[%d] is too small, may not be able to use all cpus\n", - NR_CPUS); - return; - } - pxm = pa->proximity_domain; node = acpi_map_pxm_to_node(pxm); @@ -85,20 +115,7 @@ void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) return; } - mpidr = acpi_map_madt_entry(pa->acpi_processor_uid); - if (mpidr == PHYS_CPUID_INVALID) { - pr_err("SRAT: PXM %d with ACPI ID %d has no valid MPIDR in MADT\n", - pxm, pa->acpi_processor_uid); - bad_srat(); - return; - } - - early_node_cpu_hwid[cpus_in_srat].node_id = node; - early_node_cpu_hwid[cpus_in_srat].cpu_hwid = mpidr; node_set(node, numa_nodes_parsed); - cpus_in_srat++; - pr_info("SRAT: PXM %d -> MPIDR 0x%Lx -> Node %d\n", - pxm, mpidr, node); } int __init arm64_acpi_numa_init(void) diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 414288a558c8..b5d603992d40 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -47,11 +47,11 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) unsigned long replptr; if (kernel_text_address(pc)) - return 1; + return true; replptr = (unsigned long)ALT_REPL_PTR(alt); if (pc >= replptr && pc <= (replptr + alt->alt_len)) - return 0; + return false; /* * Branching into *another* alternate sequence is doomed, and @@ -107,35 +107,91 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp return insn; } -static void __apply_alternatives(void *alt_region, bool use_linear_alias) +static void patch_alternative(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + __le32 *replptr; + int i; + + replptr = ALT_REPL_PTR(alt); + for (i = 0; i < nr_inst; i++) { + u32 insn; + + insn = get_alt_insn(alt, origptr + i, replptr + i); + updptr[i] = cpu_to_le32(insn); + } +} + +/* + * We provide our own, private D-cache cleaning function so that we don't + * accidentally call into the cache.S code, which is patched by us at + * runtime. + */ +static void clean_dcache_range_nopatch(u64 start, u64 end) +{ + u64 cur, d_size, ctr_el0; + + ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0); + d_size = 4 << cpuid_feature_extract_unsigned_field(ctr_el0, + CTR_DMINLINE_SHIFT); + cur = start & ~(d_size - 1); + do { + /* + * We must clean+invalidate to the PoC in order to avoid + * Cortex-A53 errata 826319, 827319, 824069 and 819472 + * (this corresponds to ARM64_WORKAROUND_CLEAN_CACHE) + */ + asm volatile("dc civac, %0" : : "r" (cur) : "memory"); + } while (cur += d_size, cur < end); +} + +static void __apply_alternatives(void *alt_region, bool is_module) { struct alt_instr *alt; struct alt_region *region = alt_region; - __le32 *origptr, *replptr, *updptr; + __le32 *origptr, *updptr; + alternative_cb_t alt_cb; for (alt = region->begin; alt < region->end; alt++) { - u32 insn; - int i, nr_inst; + int nr_inst; - if (!cpus_have_cap(alt->cpufeature)) + /* Use ARM64_CB_PATCH as an unconditional patch */ + if (alt->cpufeature < ARM64_CB_PATCH && + !cpus_have_cap(alt->cpufeature)) continue; - BUG_ON(alt->alt_len != alt->orig_len); + if (alt->cpufeature == ARM64_CB_PATCH) + BUG_ON(alt->alt_len != 0); + else + BUG_ON(alt->alt_len != alt->orig_len); pr_info_once("patching kernel code\n"); origptr = ALT_ORIG_PTR(alt); - replptr = ALT_REPL_PTR(alt); - updptr = use_linear_alias ? lm_alias(origptr) : origptr; - nr_inst = alt->alt_len / sizeof(insn); + updptr = is_module ? origptr : lm_alias(origptr); + nr_inst = alt->orig_len / AARCH64_INSN_SIZE; + + if (alt->cpufeature < ARM64_CB_PATCH) + alt_cb = patch_alternative; + else + alt_cb = ALT_REPL_PTR(alt); - for (i = 0; i < nr_inst; i++) { - insn = get_alt_insn(alt, origptr + i, replptr + i); - updptr[i] = cpu_to_le32(insn); + alt_cb(alt, origptr, updptr, nr_inst); + + if (!is_module) { + clean_dcache_range_nopatch((u64)origptr, + (u64)(origptr + nr_inst)); } + } - flush_icache_range((uintptr_t)origptr, - (uintptr_t)(origptr + nr_inst)); + /* + * The core module code takes care of cache maintenance in + * flush_module_icache(). + */ + if (!is_module) { + dsb(ish); + __flush_icache_all(); + isb(); } } @@ -157,7 +213,7 @@ static int __apply_alternatives_multi_stop(void *unused) isb(); } else { BUG_ON(alternatives_applied); - __apply_alternatives(®ion, true); + __apply_alternatives(®ion, false); /* Barriers provided by the cache flushing */ WRITE_ONCE(alternatives_applied, 1); } @@ -171,12 +227,14 @@ void __init apply_alternatives_all(void) stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask); } -void apply_alternatives(void *start, size_t length) +#ifdef CONFIG_MODULES +void apply_alternatives_module(void *start, size_t length) { struct alt_region region = { .begin = start, .end = start + length, }; - __apply_alternatives(®ion, false); + __apply_alternatives(®ion, true); } +#endif diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 66be504edb6c..72f63a59b008 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -44,20 +44,23 @@ EXPORT_SYMBOL(__arch_copy_in_user); EXPORT_SYMBOL(memstart_addr); /* string / mem functions */ +#ifndef CONFIG_KASAN EXPORT_SYMBOL(strchr); EXPORT_SYMBOL(strrchr); EXPORT_SYMBOL(strcmp); EXPORT_SYMBOL(strncmp); EXPORT_SYMBOL(strlen); EXPORT_SYMBOL(strnlen); +EXPORT_SYMBOL(memcmp); +EXPORT_SYMBOL(memchr); +#endif + EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(__memset); EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(__memmove); -EXPORT_SYMBOL(memchr); -EXPORT_SYMBOL(memcmp); /* atomic bitops */ EXPORT_SYMBOL(set_bit); @@ -75,3 +78,11 @@ NOKPROBE_SYMBOL(_mcount); /* arm-smccc */ EXPORT_SYMBOL(__arm_smccc_smc); EXPORT_SYMBOL(__arm_smccc_hvc); + + /* tishift.S */ +extern long long __ashlti3(long long a, int b); +EXPORT_SYMBOL(__ashlti3); +extern long long __ashrti3(long long a, int b); +EXPORT_SYMBOL(__ashrti3); +extern long long __lshrti3(long long a, int b); +EXPORT_SYMBOL(__lshrti3); diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 68450e954d47..92be1d12d590 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -13,6 +13,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/sysctl.h> +#include <linux/uaccess.h> #include <asm/cpufeature.h> #include <asm/insn.h> @@ -20,8 +21,6 @@ #include <asm/system_misc.h> #include <asm/traps.h> #include <asm/kprobes.h> -#include <linux/uaccess.h> -#include <asm/cpufeature.h> #define CREATE_TRACE_POINTS #include "trace-events-emulation.h" @@ -235,8 +234,8 @@ static void __init register_insn_emulation_sysctl(void) struct insn_emulation *insn; struct ctl_table *insns_sysctl, *sysctl; - insns_sysctl = kzalloc(sizeof(*sysctl) * (nr_insn_emulated + 1), - GFP_KERNEL); + insns_sysctl = kcalloc(nr_insn_emulated + 1, sizeof(*sysctl), + GFP_KERNEL); raw_spin_lock_irqsave(&insn_emulation_lock, flags); list_for_each_entry(insn, &insn_emulation, node) { @@ -429,7 +428,7 @@ ret: fault: pr_debug("SWP{B} emulation: access caused memory abort!\n"); - arm64_notify_segfault(regs, address); + arm64_notify_segfault(address); return 0; } @@ -442,8 +441,8 @@ static struct undef_hook swp_hooks[] = { { .instr_mask = 0x0fb00ff0, .instr_val = 0x01000090, - .pstate_mask = COMPAT_PSR_MODE_MASK, - .pstate_val = COMPAT_PSR_MODE_USR, + .pstate_mask = PSR_AA32_MODE_MASK, + .pstate_val = PSR_AA32_MODE_USR, .fn = swp_handler }, { } @@ -512,9 +511,9 @@ ret: static int cp15_barrier_set_hw_mode(bool enable) { if (enable) - config_sctlr_el1(0, SCTLR_EL1_CP15BEN); + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_CP15BEN); else - config_sctlr_el1(SCTLR_EL1_CP15BEN, 0); + sysreg_clear_set(sctlr_el1, SCTLR_EL1_CP15BEN, 0); return 0; } @@ -522,15 +521,15 @@ static struct undef_hook cp15_barrier_hooks[] = { { .instr_mask = 0x0fff0fdf, .instr_val = 0x0e070f9a, - .pstate_mask = COMPAT_PSR_MODE_MASK, - .pstate_val = COMPAT_PSR_MODE_USR, + .pstate_mask = PSR_AA32_MODE_MASK, + .pstate_val = PSR_AA32_MODE_USR, .fn = cp15barrier_handler, }, { .instr_mask = 0x0fff0fff, .instr_val = 0x0e070f95, - .pstate_mask = COMPAT_PSR_MODE_MASK, - .pstate_val = COMPAT_PSR_MODE_USR, + .pstate_mask = PSR_AA32_MODE_MASK, + .pstate_val = PSR_AA32_MODE_USR, .fn = cp15barrier_handler, }, { } @@ -549,9 +548,9 @@ static int setend_set_hw_mode(bool enable) return -EINVAL; if (enable) - config_sctlr_el1(SCTLR_EL1_SED, 0); + sysreg_clear_set(sctlr_el1, SCTLR_EL1_SED, 0); else - config_sctlr_el1(0, SCTLR_EL1_SED); + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_SED); return 0; } @@ -563,10 +562,10 @@ static int compat_setend_handler(struct pt_regs *regs, u32 big_endian) if (big_endian) { insn = "setend be"; - regs->pstate |= COMPAT_PSR_E_BIT; + regs->pstate |= PSR_AA32_E_BIT; } else { insn = "setend le"; - regs->pstate &= ~COMPAT_PSR_E_BIT; + regs->pstate &= ~PSR_AA32_E_BIT; } trace_instruction_emulation(insn, regs->pc); @@ -594,16 +593,16 @@ static struct undef_hook setend_hooks[] = { { .instr_mask = 0xfffffdff, .instr_val = 0xf1010000, - .pstate_mask = COMPAT_PSR_MODE_MASK, - .pstate_val = COMPAT_PSR_MODE_USR, + .pstate_mask = PSR_AA32_MODE_MASK, + .pstate_val = PSR_AA32_MODE_USR, .fn = a32_setend_handler, }, { /* Thumb mode */ .instr_mask = 0x0000fff7, .instr_val = 0x0000b650, - .pstate_mask = (COMPAT_PSR_T_BIT | COMPAT_PSR_MODE_MASK), - .pstate_val = (COMPAT_PSR_T_BIT | COMPAT_PSR_MODE_USR), + .pstate_mask = (PSR_AA32_T_BIT | PSR_AA32_MODE_MASK), + .pstate_val = (PSR_AA32_T_BIT | PSR_AA32_MODE_USR), .fn = t16_setend_handler, }, {} diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 1303e04110cd..323aeb5f2fe6 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -23,6 +23,7 @@ #include <linux/mm.h> #include <linux/dma-mapping.h> #include <linux/kvm_host.h> +#include <linux/preempt.h> #include <linux/suspend.h> #include <asm/cpufeature.h> #include <asm/fixmap.h> @@ -93,6 +94,8 @@ int main(void) DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE); DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE); BLANK(); + DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET); + BLANK(); DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW); @@ -133,11 +136,13 @@ int main(void) #ifdef CONFIG_KVM_ARM_HOST DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); + DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags)); DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2])); DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); + DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); #endif #ifdef CONFIG_CPU_PM DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx)); diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S deleted file mode 100644 index e5de33513b5d..000000000000 --- a/arch/arm64/kernel/bpi.S +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Contains CPU specific branch predictor invalidation sequences - * - * Copyright (C) 2018 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/linkage.h> -#include <linux/arm-smccc.h> - -.macro ventry target - .rept 31 - nop - .endr - b \target -.endm - -.macro vectors target - ventry \target + 0x000 - ventry \target + 0x080 - ventry \target + 0x100 - ventry \target + 0x180 - - ventry \target + 0x200 - ventry \target + 0x280 - ventry \target + 0x300 - ventry \target + 0x380 - - ventry \target + 0x400 - ventry \target + 0x480 - ventry \target + 0x500 - ventry \target + 0x580 - - ventry \target + 0x600 - ventry \target + 0x680 - ventry \target + 0x700 - ventry \target + 0x780 -.endm - - .align 11 -ENTRY(__bp_harden_hyp_vecs_start) - .rept 4 - vectors __kvm_hyp_vector - .endr -ENTRY(__bp_harden_hyp_vecs_end) - -ENTRY(__qcom_hyp_sanitize_link_stack_start) - stp x29, x30, [sp, #-16]! - .rept 16 - bl . + 4 - .endr - ldp x29, x30, [sp], #16 -ENTRY(__qcom_hyp_sanitize_link_stack_end) - -.macro smccc_workaround_1 inst - sub sp, sp, #(8 * 4) - stp x2, x3, [sp, #(8 * 0)] - stp x0, x1, [sp, #(8 * 2)] - mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 - \inst #0 - ldp x2, x3, [sp, #(8 * 0)] - ldp x0, x1, [sp, #(8 * 2)] - add sp, sp, #(8 * 4) -.endm - -ENTRY(__smccc_workaround_1_smc_start) - smccc_workaround_1 smc -ENTRY(__smccc_workaround_1_smc_end) - -ENTRY(__smccc_workaround_1_hvc_start) - smccc_workaround_1 hvc -ENTRY(__smccc_workaround_1_hvc_end) diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 380f2e2fbed5..0bf0a835122f 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -17,6 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/acpi.h> #include <linux/cacheinfo.h> #include <linux/of.h> @@ -46,7 +47,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, static int __init_cache_level(unsigned int cpu) { - unsigned int ctype, level, leaves, of_level; + unsigned int ctype, level, leaves, fw_level; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); for (level = 1, leaves = 0; level <= MAX_CACHE_LEVEL; level++) { @@ -59,15 +60,19 @@ static int __init_cache_level(unsigned int cpu) leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1; } - of_level = of_find_last_cache_level(cpu); - if (level < of_level) { + if (acpi_disabled) + fw_level = of_find_last_cache_level(cpu); + else + fw_level = acpi_find_last_cache_level(cpu); + + if (level < fw_level) { /* * some external caches not specified in CLIDR_EL1 * the information may be available in the device tree * only unified external caches are considered here */ - leaves += (of_level - level); - level = of_level; + leaves += (fw_level - level); + level = fw_level; } this_cpu_ci->num_levels = level; diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h index 6c2b1b4f57c9..fad90e4935fb 100644 --- a/arch/arm64/kernel/cpu-reset.h +++ b/arch/arm64/kernel/cpu-reset.h @@ -16,13 +16,14 @@ void __cpu_soft_restart(unsigned long el2_switch, unsigned long entry, unsigned long arg0, unsigned long arg1, unsigned long arg2); -static inline void __noreturn cpu_soft_restart(unsigned long el2_switch, - unsigned long entry, unsigned long arg0, unsigned long arg1, - unsigned long arg2) +static inline void __noreturn cpu_soft_restart(unsigned long entry, + unsigned long arg0, + unsigned long arg1, + unsigned long arg2) { typeof(__cpu_soft_restart) *restart; - el2_switch = el2_switch && !is_kernel_in_hyp_mode() && + unsigned long el2_switch = !is_kernel_in_hyp_mode() && is_hyp_mode_available(); restart = (void *)__pa_symbol(__cpu_soft_restart); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index b5a28336c077..a509e35132d2 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -16,6 +16,8 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/arm-smccc.h> +#include <linux/psci.h> #include <linux/types.h> #include <asm/cpu.h> #include <asm/cputype.h> @@ -24,10 +26,28 @@ static bool __maybe_unused is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope) { + const struct arm64_midr_revidr *fix; + u32 midr = read_cpuid_id(), revidr; + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + if (!is_midr_in_range(midr, &entry->midr_range)) + return false; + + midr &= MIDR_REVISION_MASK | MIDR_VARIANT_MASK; + revidr = read_cpuid(REVIDR_EL1); + for (fix = entry->fixed_revs; fix && fix->revidr_mask; fix++) + if (midr == fix->midr_rv && (revidr & fix->revidr_mask)) + return false; + + return true; +} + +static bool __maybe_unused +is_affected_midr_range_list(const struct arm64_cpu_capabilities *entry, + int scope) +{ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - return MIDR_IS_CPU_MODEL_RANGE(read_cpuid_id(), entry->midr_model, - entry->midr_range_min, - entry->midr_range_max); + return is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list); } static bool __maybe_unused @@ -41,38 +61,63 @@ is_kryo_midr(const struct arm64_cpu_capabilities *entry, int scope) model &= MIDR_IMPLEMENTOR_MASK | (0xf00 << MIDR_PARTNUM_SHIFT) | MIDR_ARCHITECTURE_MASK; - return model == entry->midr_model; + return model == entry->midr_range.model; } static bool -has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry, - int scope) +has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, + int scope) { + u64 mask = arm64_ftr_reg_ctrel0.strict_mask; + u64 sys = arm64_ftr_reg_ctrel0.sys_val & mask; + u64 ctr_raw, ctr_real; + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - return (read_cpuid_cachetype() & arm64_ftr_reg_ctrel0.strict_mask) != - (arm64_ftr_reg_ctrel0.sys_val & arm64_ftr_reg_ctrel0.strict_mask); + + /* + * We want to make sure that all the CPUs in the system expose + * a consistent CTR_EL0 to make sure that applications behaves + * correctly with migration. + * + * If a CPU has CTR_EL0.IDC but does not advertise it via CTR_EL0 : + * + * 1) It is safe if the system doesn't support IDC, as CPU anyway + * reports IDC = 0, consistent with the rest. + * + * 2) If the system has IDC, it is still safe as we trap CTR_EL0 + * access on this CPU via the ARM64_HAS_CACHE_IDC capability. + * + * So, we need to make sure either the raw CTR_EL0 or the effective + * CTR_EL0 matches the system's copy to allow a secondary CPU to boot. + */ + ctr_raw = read_cpuid_cachetype() & mask; + ctr_real = read_cpuid_effective_cachetype() & mask; + + return (ctr_real != sys) && (ctr_raw != sys); } -static int cpu_enable_trap_ctr_access(void *__unused) +static void +cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) { - /* Clear SCTLR_EL1.UCT */ - config_sctlr_el1(SCTLR_EL1_UCT, 0); - return 0; + u64 mask = arm64_ftr_reg_ctrel0.strict_mask; + + /* Trap CTR_EL0 access on this CPU, only if it has a mismatch */ + if ((read_cpuid_cachetype() & mask) != + (arm64_ftr_reg_ctrel0.sys_val & mask)) + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } +atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); + #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR #include <asm/mmu_context.h> #include <asm/cacheflush.h> DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); -#ifdef CONFIG_KVM -extern char __qcom_hyp_sanitize_link_stack_start[]; -extern char __qcom_hyp_sanitize_link_stack_end[]; +#ifdef CONFIG_KVM_INDIRECT_VECTORS extern char __smccc_workaround_1_smc_start[]; extern char __smccc_workaround_1_smc_end[]; -extern char __smccc_workaround_1_hvc_start[]; -extern char __smccc_workaround_1_hvc_end[]; static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, const char *hyp_vecs_end) @@ -83,17 +128,25 @@ static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, for (i = 0; i < SZ_2K; i += 0x80) memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start); - flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); + __flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); } static void __install_bp_hardening_cb(bp_hardening_cb_t fn, const char *hyp_vecs_start, const char *hyp_vecs_end) { - static int last_slot = -1; static DEFINE_SPINLOCK(bp_lock); int cpu, slot = -1; + /* + * enable_smccc_arch_workaround_1() passes NULL for the hyp_vecs + * start/end if we're a guest. Skip the hyp-vectors work. + */ + if (!hyp_vecs_start) { + __this_cpu_write(bp_hardening_data.fn, fn); + return; + } + spin_lock(&bp_lock); for_each_possible_cpu(cpu) { if (per_cpu(bp_hardening_data.fn, cpu) == fn) { @@ -103,10 +156,8 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, } if (slot == -1) { - last_slot++; - BUG_ON(((__bp_harden_hyp_vecs_end - __bp_harden_hyp_vecs_start) - / SZ_2K) <= last_slot); - slot = last_slot; + slot = atomic_inc_return(&arm64_el2_vector_last_slot); + BUG_ON(slot >= BP_HARDEN_EL2_SLOTS); __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); } @@ -115,12 +166,8 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, spin_unlock(&bp_lock); } #else -#define __qcom_hyp_sanitize_link_stack_start NULL -#define __qcom_hyp_sanitize_link_stack_end NULL #define __smccc_workaround_1_smc_start NULL #define __smccc_workaround_1_smc_end NULL -#define __smccc_workaround_1_hvc_start NULL -#define __smccc_workaround_1_hvc_end NULL static void __install_bp_hardening_cb(bp_hardening_cb_t fn, const char *hyp_vecs_start, @@ -128,7 +175,7 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, { __this_cpu_write(bp_hardening_data.fn, fn); } -#endif /* CONFIG_KVM */ +#endif /* CONFIG_KVM_INDIRECT_VECTORS */ static void install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry, bp_hardening_cb_t fn, @@ -161,86 +208,367 @@ static void call_hvc_arch_workaround_1(void) arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } -static int enable_smccc_arch_workaround_1(void *data) +static void qcom_link_stack_sanitization(void) +{ + u64 tmp; + + asm volatile("mov %0, x30 \n" + ".rept 16 \n" + "bl . + 4 \n" + ".endr \n" + "mov x30, %0 \n" + : "=&r" (tmp)); +} + +static void +enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) { - const struct arm64_cpu_capabilities *entry = data; bp_hardening_cb_t cb; void *smccc_start, *smccc_end; struct arm_smccc_res res; + u32 midr = read_cpuid_id(); if (!entry->matches(entry, SCOPE_LOCAL_CPU)) - return 0; + return; if (psci_ops.smccc_version == SMCCC_VERSION_1_0) - return 0; + return; switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 < 0) - return 0; + return; cb = call_hvc_arch_workaround_1; - smccc_start = __smccc_workaround_1_hvc_start; - smccc_end = __smccc_workaround_1_hvc_end; + /* This is a guest, no need to patch KVM vectors */ + smccc_start = NULL; + smccc_end = NULL; break; case PSCI_CONDUIT_SMC: arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 < 0) - return 0; + return; cb = call_smc_arch_workaround_1; smccc_start = __smccc_workaround_1_smc_start; smccc_end = __smccc_workaround_1_smc_end; break; default: - return 0; + return; } + if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) || + ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) + cb = qcom_link_stack_sanitization; + install_bp_hardening_cb(entry, cb, smccc_start, smccc_end); - return 0; + return; } +#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ -static void qcom_link_stack_sanitization(void) +#ifdef CONFIG_ARM64_SSBD +DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); + +int ssbd_state __read_mostly = ARM64_SSBD_KERNEL; + +static const struct ssbd_options { + const char *str; + int state; +} ssbd_options[] = { + { "force-on", ARM64_SSBD_FORCE_ENABLE, }, + { "force-off", ARM64_SSBD_FORCE_DISABLE, }, + { "kernel", ARM64_SSBD_KERNEL, }, +}; + +static int __init ssbd_cfg(char *buf) { - u64 tmp; + int i; - asm volatile("mov %0, x30 \n" - ".rept 16 \n" - "bl . + 4 \n" - ".endr \n" - "mov x30, %0 \n" - : "=&r" (tmp)); + if (!buf || !buf[0]) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(ssbd_options); i++) { + int len = strlen(ssbd_options[i].str); + + if (strncmp(buf, ssbd_options[i].str, len)) + continue; + + ssbd_state = ssbd_options[i].state; + return 0; + } + + return -EINVAL; } +early_param("ssbd", ssbd_cfg); -static int qcom_enable_link_stack_sanitization(void *data) +void __init arm64_update_smccc_conduit(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, + int nr_inst) { - const struct arm64_cpu_capabilities *entry = data; + u32 insn; - install_bp_hardening_cb(entry, qcom_link_stack_sanitization, - __qcom_hyp_sanitize_link_stack_start, - __qcom_hyp_sanitize_link_stack_end); + BUG_ON(nr_inst != 1); - return 0; + switch (psci_ops.conduit) { + case PSCI_CONDUIT_HVC: + insn = aarch64_insn_get_hvc_value(); + break; + case PSCI_CONDUIT_SMC: + insn = aarch64_insn_get_smc_value(); + break; + default: + return; + } + + *updptr = cpu_to_le32(insn); } -#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ -#define MIDR_RANGE(model, min, max) \ - .def_scope = SCOPE_LOCAL_CPU, \ - .matches = is_affected_midr_range, \ - .midr_model = model, \ - .midr_range_min = min, \ - .midr_range_max = max +void __init arm64_enable_wa2_handling(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, + int nr_inst) +{ + BUG_ON(nr_inst != 1); + /* + * Only allow mitigation on EL1 entry/exit and guest + * ARCH_WORKAROUND_2 handling if the SSBD state allows it to + * be flipped. + */ + if (arm64_get_ssbd_state() == ARM64_SSBD_KERNEL) + *updptr = cpu_to_le32(aarch64_insn_gen_nop()); +} + +void arm64_set_ssbd_mitigation(bool state) +{ + if (this_cpu_has_cap(ARM64_SSBS)) { + if (state) + asm volatile(SET_PSTATE_SSBS(0)); + else + asm volatile(SET_PSTATE_SSBS(1)); + return; + } + + switch (psci_ops.conduit) { + case PSCI_CONDUIT_HVC: + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); + break; + + case PSCI_CONDUIT_SMC: + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); + break; + + default: + WARN_ON_ONCE(1); + break; + } +} + +static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, + int scope) +{ + struct arm_smccc_res res; + bool required = true; + s32 val; + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + + if (this_cpu_has_cap(ARM64_SSBS)) { + required = false; + goto out_printmsg; + } + + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) { + ssbd_state = ARM64_SSBD_UNKNOWN; + return false; + } + + switch (psci_ops.conduit) { + case PSCI_CONDUIT_HVC: + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_2, &res); + break; + + case PSCI_CONDUIT_SMC: + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_2, &res); + break; + + default: + ssbd_state = ARM64_SSBD_UNKNOWN; + return false; + } + + val = (s32)res.a0; + + switch (val) { + case SMCCC_RET_NOT_SUPPORTED: + ssbd_state = ARM64_SSBD_UNKNOWN; + return false; + + case SMCCC_RET_NOT_REQUIRED: + pr_info_once("%s mitigation not required\n", entry->desc); + ssbd_state = ARM64_SSBD_MITIGATED; + return false; + + case SMCCC_RET_SUCCESS: + required = true; + break; + + case 1: /* Mitigation not required on this CPU */ + required = false; + break; + + default: + WARN_ON(1); + return false; + } -#define MIDR_ALL_VERSIONS(model) \ - .def_scope = SCOPE_LOCAL_CPU, \ - .matches = is_affected_midr_range, \ - .midr_model = model, \ - .midr_range_min = 0, \ - .midr_range_max = (MIDR_VARIANT_MASK | MIDR_REVISION_MASK) + switch (ssbd_state) { + case ARM64_SSBD_FORCE_DISABLE: + arm64_set_ssbd_mitigation(false); + required = false; + break; + + case ARM64_SSBD_KERNEL: + if (required) { + __this_cpu_write(arm64_ssbd_callback_required, 1); + arm64_set_ssbd_mitigation(true); + } + break; + + case ARM64_SSBD_FORCE_ENABLE: + arm64_set_ssbd_mitigation(true); + required = true; + break; + + default: + WARN_ON(1); + break; + } + +out_printmsg: + switch (ssbd_state) { + case ARM64_SSBD_FORCE_DISABLE: + pr_info_once("%s disabled from command-line\n", entry->desc); + break; + + case ARM64_SSBD_FORCE_ENABLE: + pr_info_once("%s forced from command-line\n", entry->desc); + break; + } + + return required; +} +#endif /* CONFIG_ARM64_SSBD */ + +static void __maybe_unused +cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) +{ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0); +} + +#define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ + .matches = is_affected_midr_range, \ + .midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max) + +#define CAP_MIDR_ALL_VERSIONS(model) \ + .matches = is_affected_midr_range, \ + .midr_range = MIDR_ALL_VERSIONS(model) + +#define MIDR_FIXED(rev, revidr_mask) \ + .fixed_revs = (struct arm64_midr_revidr[]){{ (rev), (revidr_mask) }, {}} + +#define ERRATA_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ + CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) + +#define CAP_MIDR_RANGE_LIST(list) \ + .matches = is_affected_midr_range_list, \ + .midr_range_list = list + +/* Errata affecting a range of revisions of given model variant */ +#define ERRATA_MIDR_REV_RANGE(m, var, r_min, r_max) \ + ERRATA_MIDR_RANGE(m, var, r_min, var, r_max) + +/* Errata affecting a single variant/revision of a model */ +#define ERRATA_MIDR_REV(model, var, rev) \ + ERRATA_MIDR_RANGE(model, var, rev, var, rev) + +/* Errata affecting all variants/revisions of a given a model */ +#define ERRATA_MIDR_ALL_VERSIONS(model) \ + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ + CAP_MIDR_ALL_VERSIONS(model) + +/* Errata affecting a list of midr ranges, with same work around */ +#define ERRATA_MIDR_RANGE_LIST(midr_list) \ + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ + CAP_MIDR_RANGE_LIST(midr_list) + +/* + * Generic helper for handling capabilties with multiple (match,enable) pairs + * of call backs, sharing the same capability bit. + * Iterate over each entry to see if at least one matches. + */ +static bool __maybe_unused +multi_entry_cap_matches(const struct arm64_cpu_capabilities *entry, int scope) +{ + const struct arm64_cpu_capabilities *caps; + + for (caps = entry->match_list; caps->matches; caps++) + if (caps->matches(caps, scope)) + return true; + + return false; +} + +/* + * Take appropriate action for all matching entries in the shared capability + * entry. + */ +static void __maybe_unused +multi_entry_cap_cpu_enable(const struct arm64_cpu_capabilities *entry) +{ + const struct arm64_cpu_capabilities *caps; + + for (caps = entry->match_list; caps->matches; caps++) + if (caps->matches(caps, SCOPE_LOCAL_CPU) && + caps->cpu_enable) + caps->cpu_enable(caps); +} + +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR + +/* + * List of CPUs where we need to issue a psci call to + * harden the branch predictor. + */ +static const struct midr_range arm64_bp_harden_smccc_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), + MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), + MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), + MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), + MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), + MIDR_ALL_VERSIONS(MIDR_NVIDIA_DENVER), + {}, +}; + +#endif + +#ifdef CONFIG_HARDEN_EL2_VECTORS + +static const struct midr_range arm64_harden_el2_vectors[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + {}, +}; + +#endif const struct arm64_cpu_capabilities arm64_errata[] = { #if defined(CONFIG_ARM64_ERRATUM_826319) || \ @@ -250,8 +578,8 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A53 r0p[012] */ .desc = "ARM errata 826319, 827319, 824069", .capability = ARM64_WORKAROUND_CLEAN_CACHE, - MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x02), - .enable = cpu_enable_cache_maint_trap, + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 2), + .cpu_enable = cpu_enable_cache_maint_trap, }, #endif #ifdef CONFIG_ARM64_ERRATUM_819472 @@ -259,8 +587,8 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A53 r0p[01] */ .desc = "ARM errata 819472", .capability = ARM64_WORKAROUND_CLEAN_CACHE, - MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x01), - .enable = cpu_enable_cache_maint_trap, + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 1), + .cpu_enable = cpu_enable_cache_maint_trap, }, #endif #ifdef CONFIG_ARM64_ERRATUM_832075 @@ -268,9 +596,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A57 r0p0 - r1p2 */ .desc = "ARM erratum 832075", .capability = ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE, - MIDR_RANGE(MIDR_CORTEX_A57, - MIDR_CPU_VAR_REV(0, 0), - MIDR_CPU_VAR_REV(1, 2)), + ERRATA_MIDR_RANGE(MIDR_CORTEX_A57, + 0, 0, + 1, 2), }, #endif #ifdef CONFIG_ARM64_ERRATUM_834220 @@ -278,9 +606,18 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A57 r0p0 - r1p2 */ .desc = "ARM erratum 834220", .capability = ARM64_WORKAROUND_834220, - MIDR_RANGE(MIDR_CORTEX_A57, - MIDR_CPU_VAR_REV(0, 0), - MIDR_CPU_VAR_REV(1, 2)), + ERRATA_MIDR_RANGE(MIDR_CORTEX_A57, + 0, 0, + 1, 2), + }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_843419 + { + /* Cortex-A53 r0p[01234] */ + .desc = "ARM erratum 843419", + .capability = ARM64_WORKAROUND_843419, + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), + MIDR_FIXED(0x4, BIT(8)), }, #endif #ifdef CONFIG_ARM64_ERRATUM_845719 @@ -288,7 +625,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A53 r0p[01234] */ .desc = "ARM erratum 845719", .capability = ARM64_WORKAROUND_845719, - MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x04), + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), }, #endif #ifdef CONFIG_CAVIUM_ERRATUM_23154 @@ -296,7 +633,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cavium ThunderX, pass 1.x */ .desc = "Cavium erratum 23154", .capability = ARM64_WORKAROUND_CAVIUM_23154, - MIDR_RANGE(MIDR_THUNDERX, 0x00, 0x01), + ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1), }, #endif #ifdef CONFIG_CAVIUM_ERRATUM_27456 @@ -304,15 +641,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cavium ThunderX, T88 pass 1.x - 2.1 */ .desc = "Cavium erratum 27456", .capability = ARM64_WORKAROUND_CAVIUM_27456, - MIDR_RANGE(MIDR_THUNDERX, - MIDR_CPU_VAR_REV(0, 0), - MIDR_CPU_VAR_REV(1, 1)), + ERRATA_MIDR_RANGE(MIDR_THUNDERX, + 0, 0, + 1, 1), }, { /* Cavium ThunderX, T81 pass 1.0 */ .desc = "Cavium erratum 27456", .capability = ARM64_WORKAROUND_CAVIUM_27456, - MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00), + ERRATA_MIDR_REV(MIDR_THUNDERX_81XX, 0, 0), }, #endif #ifdef CONFIG_CAVIUM_ERRATUM_30115 @@ -320,42 +657,41 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cavium ThunderX, T88 pass 1.x - 2.2 */ .desc = "Cavium erratum 30115", .capability = ARM64_WORKAROUND_CAVIUM_30115, - MIDR_RANGE(MIDR_THUNDERX, 0x00, - (1 << MIDR_VARIANT_SHIFT) | 2), + ERRATA_MIDR_RANGE(MIDR_THUNDERX, + 0, 0, + 1, 2), }, { /* Cavium ThunderX, T81 pass 1.0 - 1.2 */ .desc = "Cavium erratum 30115", .capability = ARM64_WORKAROUND_CAVIUM_30115, - MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x02), + ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX_81XX, 0, 0, 2), }, { /* Cavium ThunderX, T83 pass 1.0 */ .desc = "Cavium erratum 30115", .capability = ARM64_WORKAROUND_CAVIUM_30115, - MIDR_RANGE(MIDR_THUNDERX_83XX, 0x00, 0x00), + ERRATA_MIDR_REV(MIDR_THUNDERX_83XX, 0, 0), }, #endif { - .desc = "Mismatched cache line size", - .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, - .matches = has_mismatched_cache_line_size, - .def_scope = SCOPE_LOCAL_CPU, - .enable = cpu_enable_trap_ctr_access, + .desc = "Mismatched cache type (CTR_EL0)", + .capability = ARM64_MISMATCHED_CACHE_TYPE, + .matches = has_mismatched_cache_type, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .cpu_enable = cpu_enable_trap_ctr_access, }, #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 { .desc = "Qualcomm Technologies Falkor erratum 1003", .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003, - MIDR_RANGE(MIDR_QCOM_FALKOR_V1, - MIDR_CPU_VAR_REV(0, 0), - MIDR_CPU_VAR_REV(0, 0)), + ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0), }, { .desc = "Qualcomm Technologies Kryo erratum 1003", .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003, - .def_scope = SCOPE_LOCAL_CPU, - .midr_model = MIDR_QCOM_KRYO, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .midr_range.model = MIDR_QCOM_KRYO, .matches = is_kryo_midr, }, #endif @@ -363,9 +699,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "Qualcomm Technologies Falkor erratum 1009", .capability = ARM64_WORKAROUND_REPEAT_TLBI, - MIDR_RANGE(MIDR_QCOM_FALKOR_V1, - MIDR_CPU_VAR_REV(0, 0), - MIDR_CPU_VAR_REV(0, 0)), + ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0), }, #endif #ifdef CONFIG_ARM64_ERRATUM_858921 @@ -373,92 +707,39 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A73 all versions */ .desc = "ARM erratum 858921", .capability = ARM64_WORKAROUND_858921, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), }, #endif #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), - .enable = enable_smccc_arch_workaround_1, - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), - .enable = enable_smccc_arch_workaround_1, - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), - .enable = enable_smccc_arch_workaround_1, - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), - .enable = enable_smccc_arch_workaround_1, - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), - .enable = qcom_enable_link_stack_sanitization, - }, - { - .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, - MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), - .enable = qcom_enable_link_stack_sanitization, + .cpu_enable = enable_smccc_arch_workaround_1, + ERRATA_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus), }, +#endif +#ifdef CONFIG_HARDEN_EL2_VECTORS { - .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, - MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), + .desc = "EL2 vector hardening", + .capability = ARM64_HARDEN_EL2_VECTORS, + ERRATA_MIDR_RANGE_LIST(arm64_harden_el2_vectors), }, +#endif +#ifdef CONFIG_ARM64_SSBD { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), - .enable = enable_smccc_arch_workaround_1, + .desc = "Speculative Store Bypass Disable", + .capability = ARM64_SSBD, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = has_ssbd_mitigation, }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_1188873 { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), - .enable = enable_smccc_arch_workaround_1, + /* Cortex-A76 r0p0 to r2p0 */ + .desc = "ARM erratum 1188873", + .capability = ARM64_WORKAROUND_1188873, + ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0), }, #endif { } }; - -/* - * The CPU Errata work arounds are detected and applied at boot time - * and the related information is freed soon after. If the new CPU requires - * an errata not detected at boot, fail this CPU. - */ -void verify_local_cpu_errata_workarounds(void) -{ - const struct arm64_cpu_capabilities *caps = arm64_errata; - - for (; caps->matches; caps++) { - if (cpus_have_cap(caps->capability)) { - if (caps->enable) - caps->enable((void *)caps); - } else if (caps->matches(caps, SCOPE_LOCAL_CPU)) { - pr_crit("CPU%d: Requires work around for %s, not detected" - " at boot time\n", - smp_processor_id(), - caps->desc ? : "an erratum"); - cpu_die_early(); - } - } -} - -void update_cpu_errata_workarounds(void) -{ - update_cpu_capabilities(arm64_errata, "enabling workaround for"); -} - -void __init enable_errata_workarounds(void) -{ - enable_cpu_capabilities(arm64_errata); -} diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 2985a067fc13..af50064dea51 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -20,6 +20,7 @@ #include <linux/bsearch.h> #include <linux/cpumask.h> +#include <linux/crash_dump.h> #include <linux/sort.h> #include <linux/stop_machine.h> #include <linux/types.h> @@ -117,12 +118,14 @@ EXPORT_SYMBOL(cpu_hwcap_keys); static bool __maybe_unused cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused); +static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap); /* * NOTE: Any changes to the visibility of features should be kept in * sync with the documentation of the CPU feature register ABI. */ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM4_SHIFT, 4, 0), @@ -148,6 +151,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_DIT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_RAS_SHIFT, 4, 0), @@ -162,6 +166,11 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), @@ -190,6 +199,8 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LSM_SHIFT, 4, 0), @@ -199,19 +210,19 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { }; static const struct arm64_ftr_bits ftr_ctr[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 29, 1, 1), /* DIC */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 28, 1, 1), /* IDC */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 20, 4, 0), /* ERG */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DIC_SHIFT, 1, 1), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IDC_SHIFT, 1, 1), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, CTR_CWG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, CTR_ERG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_DMINLINE_SHIFT, 4, 1), /* * Linux can handle differing I-cache policies. Userspace JITs will * make use of *minLine. * If we have differing I-cache policies, report it as the weakest - VIPT. */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_VIPT), /* L1Ip */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IMINLINE_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -367,7 +378,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 4 */ ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), - ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_raz), + ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1), ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_raz), /* Op1 = 0, CRn = 0, CRm = 5 */ @@ -506,6 +517,9 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) reg->user_mask = user_mask; } +extern const struct arm64_cpu_capabilities arm64_errata[]; +static void __init setup_boot_cpu_capabilities(void); + void __init init_cpu_features(struct cpuinfo_arm64 *info) { /* Before we start using the tables, make sure it is sorted */ @@ -548,6 +562,12 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr); sve_init_vq_map(); } + + /* + * Detect and enable early CPU capabilities based on the boot CPU, + * after we have initialised the CPU feature infrastructure. + */ + setup_boot_cpu_capabilities(); } static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) @@ -644,7 +664,6 @@ void update_cpu_features(int cpu, /* * EL3 is not our concern. - * ID_AA64PFR1 is currently RES0. */ taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu, info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0); @@ -826,40 +845,79 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int _ MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK)); } -static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused) +static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused) { - return is_kernel_in_hyp_mode(); + u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + + return cpuid_feature_extract_signed_field(pfr0, + ID_AA64PFR0_FP_SHIFT) < 0; } -static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry, - int __unused) +static bool has_cache_idc(const struct arm64_cpu_capabilities *entry, + int scope) { - phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start); + u64 ctr; + if (scope == SCOPE_SYSTEM) + ctr = arm64_ftr_reg_ctrel0.sys_val; + else + ctr = read_cpuid_effective_cachetype(); + + return ctr & BIT(CTR_IDC_SHIFT); +} + +static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unused) +{ /* - * Activate the lower HYP offset only if: - * - the idmap doesn't clash with it, - * - the kernel is not running at EL2. + * If the CPU exposes raw CTR_EL0.IDC = 0, while effectively + * CTR_EL0.IDC = 1 (from CLIDR values), we need to trap accesses + * to the CTR_EL0 on this CPU and emulate it with the real/safe + * value. */ - return idmap_addr > GENMASK(VA_BITS - 2, 0) && !is_kernel_in_hyp_mode(); + if (!(read_cpuid_cachetype() & BIT(CTR_IDC_SHIFT))) + sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } -static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused) +static bool has_cache_dic(const struct arm64_cpu_capabilities *entry, + int scope) { - u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + u64 ctr; - return cpuid_feature_extract_signed_field(pfr0, - ID_AA64PFR0_FP_SHIFT) < 0; + if (scope == SCOPE_SYSTEM) + ctr = arm64_ftr_reg_ctrel0.sys_val; + else + ctr = read_cpuid_cachetype(); + + return ctr & BIT(CTR_DIC_SHIFT); +} + +static bool __maybe_unused +has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope) +{ + /* + * Kdump isn't guaranteed to power-off all secondary CPUs, CNP + * may share TLB entries with a CPU stuck in the crashed + * kernel. + */ + if (is_kdump_kernel()) + return false; + + return has_cpuid_feature(entry, scope); } #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, - int __unused) + int scope) { + /* List of CPUs that are not vulnerable and don't need KPTI */ + static const struct midr_range kpti_safe_list[] = { + MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), + MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), + { /* sentinel */ } + }; char const *str = "command line option"; - u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); /* * For reasons that aren't entirely clear, enabling KPTI on Cavium @@ -883,18 +941,15 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, return true; /* Don't force KPTI for CPUs that are not vulnerable */ - switch (read_cpuid_id() & MIDR_CPU_MODEL_MASK) { - case MIDR_CAVIUM_THUNDERX2: - case MIDR_BRCM_VULCAN: + if (is_midr_in_range_list(read_cpuid_id(), kpti_safe_list)) return false; - } /* Defer to CPU feature registers */ - return !cpuid_feature_extract_unsigned_field(pfr0, - ID_AA64PFR0_CSV3_SHIFT); + return !has_cpuid_feature(entry, scope); } -static int kpti_install_ng_mappings(void *__unused) +static void +kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) { typedef void (kpti_remap_fn)(int, int, phys_addr_t); extern kpti_remap_fn idmap_kpti_install_ng_mappings; @@ -904,7 +959,7 @@ static int kpti_install_ng_mappings(void *__unused) int cpu = smp_processor_id(); if (kpti_applied) - return 0; + return; remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings); @@ -915,7 +970,7 @@ static int kpti_install_ng_mappings(void *__unused) if (!cpu) kpti_applied = true; - return 0; + return; } static int __init parse_kpti(char *str) @@ -929,10 +984,81 @@ static int __init parse_kpti(char *str) __kpti_forced = enabled ? 1 : -1; return 0; } -__setup("kpti=", parse_kpti); +early_param("kpti", parse_kpti); #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ -static int cpu_copy_el2regs(void *__unused) +#ifdef CONFIG_ARM64_HW_AFDBM +static inline void __cpu_enable_hw_dbm(void) +{ + u64 tcr = read_sysreg(tcr_el1) | TCR_HD; + + write_sysreg(tcr, tcr_el1); + isb(); +} + +static bool cpu_has_broken_dbm(void) +{ + /* List of CPUs which have broken DBM support. */ + static const struct midr_range cpus[] = { +#ifdef CONFIG_ARM64_ERRATUM_1024718 + MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 1, 0), // A55 r0p0 -r1p0 +#endif + {}, + }; + + return is_midr_in_range_list(read_cpuid_id(), cpus); +} + +static bool cpu_can_use_dbm(const struct arm64_cpu_capabilities *cap) +{ + return has_cpuid_feature(cap, SCOPE_LOCAL_CPU) && + !cpu_has_broken_dbm(); +} + +static void cpu_enable_hw_dbm(struct arm64_cpu_capabilities const *cap) +{ + if (cpu_can_use_dbm(cap)) + __cpu_enable_hw_dbm(); +} + +static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap, + int __unused) +{ + static bool detected = false; + /* + * DBM is a non-conflicting feature. i.e, the kernel can safely + * run a mix of CPUs with and without the feature. So, we + * unconditionally enable the capability to allow any late CPU + * to use the feature. We only enable the control bits on the + * CPU, if it actually supports. + * + * We have to make sure we print the "feature" detection only + * when at least one CPU actually uses it. So check if this CPU + * can actually use it and print the message exactly once. + * + * This is safe as all CPUs (including secondary CPUs - due to the + * LOCAL_CPU scope - and the hotplugged CPUs - via verification) + * goes through the "matches" check exactly once. Also if a CPU + * matches the criteria, it is guaranteed that the CPU will turn + * the DBM on, as the capability is unconditionally enabled. + */ + if (!detected && cpu_can_use_dbm(cap)) { + detected = true; + pr_info("detected: Hardware dirty bit management\n"); + } + + return true; +} + +#endif + +#ifdef CONFIG_ARM64_VHE +static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused) +{ + return is_kernel_in_hyp_mode(); +} + +static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) { /* * Copy register values that aren't redirected by hardware. @@ -944,15 +1070,86 @@ static int cpu_copy_el2regs(void *__unused) */ if (!alternatives_applied) write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); +} +#endif + +static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused) +{ + u64 val = read_sysreg_s(SYS_CLIDR_EL1); + /* Check that CLIDR_EL1.LOU{U,IS} are both 0 */ + WARN_ON(val & (7 << 27 | 7 << 21)); +} + +#ifdef CONFIG_ARM64_SSBD +static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) +{ + if (user_mode(regs)) + return 1; + + if (instr & BIT(PSTATE_Imm_shift)) + regs->pstate |= PSR_SSBS_BIT; + else + regs->pstate &= ~PSR_SSBS_BIT; + + arm64_skip_faulting_instruction(regs, 4); return 0; } +static struct undef_hook ssbs_emulation_hook = { + .instr_mask = ~(1U << PSTATE_Imm_shift), + .instr_val = 0xd500401f | PSTATE_SSBS, + .fn = ssbs_emulation_handler, +}; + +static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused) +{ + static bool undef_hook_registered = false; + static DEFINE_SPINLOCK(hook_lock); + + spin_lock(&hook_lock); + if (!undef_hook_registered) { + register_undef_hook(&ssbs_emulation_hook); + undef_hook_registered = true; + } + spin_unlock(&hook_lock); + + if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { + sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); + arm64_set_ssbd_mitigation(false); + } else { + arm64_set_ssbd_mitigation(true); + } +} +#endif /* CONFIG_ARM64_SSBD */ + +#ifdef CONFIG_ARM64_PAN +static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) +{ + /* + * We modify PSTATE. This won't work from irq context as the PSTATE + * is discarded once we return from the exception. + */ + WARN_ON_ONCE(in_interrupt()); + + sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); + asm(SET_PSTATE_PAN(1)); +} +#endif /* CONFIG_ARM64_PAN */ + +#ifdef CONFIG_ARM64_RAS_EXTN +static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) +{ + /* Firmware may have left a deferred SError in this register. */ + write_sysreg_s(0, SYS_DISR_EL1); +} +#endif /* CONFIG_ARM64_RAS_EXTN */ + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", .capability = ARM64_HAS_SYSREG_GIC_CPUIF, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_useable_gicv3_cpuif, .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_GIC_SHIFT, @@ -963,20 +1160,20 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Privileged Access Never", .capability = ARM64_HAS_PAN, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR1_EL1, .field_pos = ID_AA64MMFR1_PAN_SHIFT, .sign = FTR_UNSIGNED, .min_field_value = 1, - .enable = cpu_enable_pan, + .cpu_enable = cpu_enable_pan, }, #endif /* CONFIG_ARM64_PAN */ #if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS) { .desc = "LSE atomic instructions", .capability = ARM64_HAS_LSE_ATOMICS, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT, @@ -987,14 +1184,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Software prefetching using PRFM", .capability = ARM64_HAS_NO_HW_PREFETCH, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, .matches = has_no_hw_prefetch, }, #ifdef CONFIG_ARM64_UAO { .desc = "User Access Override", .capability = ARM64_HAS_UAO, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR2_EL1, .field_pos = ID_AA64MMFR2_UAO_SHIFT, @@ -1008,46 +1205,50 @@ static const struct arm64_cpu_capabilities arm64_features[] = { #ifdef CONFIG_ARM64_PAN { .capability = ARM64_ALT_PAN_NOT_UAO, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = cpufeature_pan_not_uao, }, #endif /* CONFIG_ARM64_PAN */ +#ifdef CONFIG_ARM64_VHE { .desc = "Virtualization Host Extensions", .capability = ARM64_HAS_VIRT_HOST_EXTN, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, .matches = runs_at_el2, - .enable = cpu_copy_el2regs, + .cpu_enable = cpu_copy_el2regs, }, +#endif /* CONFIG_ARM64_VHE */ { .desc = "32-bit EL0 Support", .capability = ARM64_HAS_32BIT_EL0, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_EL0_SHIFT, .min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT, }, - { - .desc = "Reduced HYP mapping offset", - .capability = ARM64_HYP_OFFSET_LOW, - .def_scope = SCOPE_SYSTEM, - .matches = hyp_offset_low, - }, #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 { .desc = "Kernel page table isolation (KPTI)", .capability = ARM64_UNMAP_KERNEL_AT_EL0, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE, + /* + * The ID feature fields below are used to indicate that + * the CPU doesn't need KPTI. See unmap_kernel_at_el0 for + * more details. + */ + .sys_reg = SYS_ID_AA64PFR0_EL1, + .field_pos = ID_AA64PFR0_CSV3_SHIFT, + .min_field_value = 1, .matches = unmap_kernel_at_el0, - .enable = kpti_install_ng_mappings, + .cpu_enable = kpti_install_ng_mappings, }, #endif { /* FP/SIMD is not implemented */ .capability = ARM64_HAS_NO_FPSIMD, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .min_field_value = 0, .matches = has_no_fpsimd, }, @@ -1055,7 +1256,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Data cache clean to Point of Persistence", .capability = ARM64_HAS_DCPOP, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, .field_pos = ID_AA64ISAR1_DPB_SHIFT, @@ -1065,42 +1266,121 @@ static const struct arm64_cpu_capabilities arm64_features[] = { #ifdef CONFIG_ARM64_SVE { .desc = "Scalable Vector Extension", + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .capability = ARM64_SVE, - .def_scope = SCOPE_SYSTEM, .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_SVE_SHIFT, .min_field_value = ID_AA64PFR0_SVE, .matches = has_cpuid_feature, - .enable = sve_kernel_enable, + .cpu_enable = sve_kernel_enable, }, #endif /* CONFIG_ARM64_SVE */ #ifdef CONFIG_ARM64_RAS_EXTN { .desc = "RAS Extension Support", .capability = ARM64_HAS_RAS_EXTN, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_RAS_SHIFT, .min_field_value = ID_AA64PFR0_RAS_V1, - .enable = cpu_clear_disr, + .cpu_enable = cpu_clear_disr, }, #endif /* CONFIG_ARM64_RAS_EXTN */ + { + .desc = "Data cache clean to the PoU not required for I/D coherence", + .capability = ARM64_HAS_CACHE_IDC, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cache_idc, + .cpu_enable = cpu_emulate_effective_ctr, + }, + { + .desc = "Instruction cache invalidation not required for I/D coherence", + .capability = ARM64_HAS_CACHE_DIC, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cache_dic, + }, + { + .desc = "Stage-2 Force Write-Back", + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_HAS_STAGE2_FWB, + .sys_reg = SYS_ID_AA64MMFR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR2_FWB_SHIFT, + .min_field_value = 1, + .matches = has_cpuid_feature, + .cpu_enable = cpu_has_fwb, + }, +#ifdef CONFIG_ARM64_HW_AFDBM + { + /* + * Since we turn this on always, we don't want the user to + * think that the feature is available when it may not be. + * So hide the description. + * + * .desc = "Hardware pagetable Dirty Bit Management", + * + */ + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + .capability = ARM64_HW_DBM, + .sys_reg = SYS_ID_AA64MMFR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR1_HADBS_SHIFT, + .min_field_value = 2, + .matches = has_hw_dbm, + .cpu_enable = cpu_enable_hw_dbm, + }, +#endif +#ifdef CONFIG_ARM64_SSBD + { + .desc = "CRC32 instructions", + .capability = ARM64_HAS_CRC32, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64ISAR0_EL1, + .field_pos = ID_AA64ISAR0_CRC32_SHIFT, + .min_field_value = 1, + }, + { + .desc = "Speculative Store Bypassing Safe (SSBS)", + .capability = ARM64_SSBS, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .field_pos = ID_AA64PFR1_SSBS_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY, + .cpu_enable = cpu_enable_ssbs, + }, +#endif +#ifdef CONFIG_ARM64_CNP + { + .desc = "Common not Private translations", + .capability = ARM64_HAS_CNP, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_useable_cnp, + .sys_reg = SYS_ID_AA64MMFR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR2_CNP_SHIFT, + .min_field_value = 1, + .cpu_enable = cpu_enable_cnp, + }, +#endif {}, }; -#define HWCAP_CAP(reg, field, s, min_value, type, cap) \ +#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap) \ { \ .desc = #cap, \ - .def_scope = SCOPE_SYSTEM, \ + .type = ARM64_CPUCAP_SYSTEM_FEATURE, \ .matches = has_cpuid_feature, \ .sys_reg = reg, \ .field_pos = field, \ .sign = s, \ .min_field_value = min_value, \ - .hwcap_type = type, \ + .hwcap_type = cap_type, \ .hwcap = cap, \ } @@ -1118,17 +1398,22 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDFHM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FLAGM), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_DIT), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_DCPOP), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ILRCPC), + HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, HWCAP_SVE), #endif + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, HWCAP_SSBS), {}, }; @@ -1193,7 +1478,7 @@ static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps) /* We support emulation of accesses to CPU ID feature registers */ elf_hwcap |= HWCAP_CPUID; for (; hwcaps->matches; hwcaps++) - if (hwcaps->matches(hwcaps, hwcaps->def_scope)) + if (hwcaps->matches(hwcaps, cpucap_default_scope(hwcaps))) cap_set_elf_hwcap(hwcaps); } @@ -1210,17 +1495,19 @@ static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array, return false; for (caps = cap_array; caps->matches; caps++) - if (caps->capability == cap && - caps->matches(caps, SCOPE_LOCAL_CPU)) - return true; + if (caps->capability == cap) + return caps->matches(caps, SCOPE_LOCAL_CPU); + return false; } -void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, - const char *info) +static void __update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, + u16 scope_mask, const char *info) { + scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (; caps->matches; caps++) { - if (!caps->matches(caps, caps->def_scope)) + if (!(caps->type & scope_mask) || + !caps->matches(caps, cpucap_default_scope(caps))) continue; if (!cpus_have_cap(caps->capability) && caps->desc) @@ -1229,31 +1516,130 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, } } +static void update_cpu_capabilities(u16 scope_mask) +{ + __update_cpu_capabilities(arm64_errata, scope_mask, + "enabling workaround for"); + __update_cpu_capabilities(arm64_features, scope_mask, "detected:"); +} + +static int __enable_cpu_capability(void *arg) +{ + const struct arm64_cpu_capabilities *cap = arg; + + cap->cpu_enable(cap); + return 0; +} + /* * Run through the enabled capabilities and enable() it on all active * CPUs */ -void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) +static void __init +__enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps, + u16 scope_mask) { + scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (; caps->matches; caps++) { unsigned int num = caps->capability; - if (!cpus_have_cap(num)) + if (!(caps->type & scope_mask) || !cpus_have_cap(num)) continue; /* Ensure cpus_have_const_cap(num) works */ static_branch_enable(&cpu_hwcap_keys[num]); - if (caps->enable) { + if (caps->cpu_enable) { + /* + * Capabilities with SCOPE_BOOT_CPU scope are finalised + * before any secondary CPU boots. Thus, each secondary + * will enable the capability as appropriate via + * check_local_cpu_capabilities(). The only exception is + * the boot CPU, for which the capability must be + * enabled here. This approach avoids costly + * stop_machine() calls for this case. + * + * Otherwise, use stop_machine() as it schedules the + * work allowing us to modify PSTATE, instead of + * on_each_cpu() which uses an IPI, giving us a PSTATE + * that disappears when we return. + */ + if (scope_mask & SCOPE_BOOT_CPU) + caps->cpu_enable(caps); + else + stop_machine(__enable_cpu_capability, + (void *)caps, cpu_online_mask); + } + } +} + +static void __init enable_cpu_capabilities(u16 scope_mask) +{ + __enable_cpu_capabilities(arm64_errata, scope_mask); + __enable_cpu_capabilities(arm64_features, scope_mask); +} + +/* + * Run through the list of capabilities to check for conflicts. + * If the system has already detected a capability, take necessary + * action on this CPU. + * + * Returns "false" on conflicts. + */ +static bool +__verify_local_cpu_caps(const struct arm64_cpu_capabilities *caps, + u16 scope_mask) +{ + bool cpu_has_cap, system_has_cap; + + scope_mask &= ARM64_CPUCAP_SCOPE_MASK; + + for (; caps->matches; caps++) { + if (!(caps->type & scope_mask)) + continue; + + cpu_has_cap = caps->matches(caps, SCOPE_LOCAL_CPU); + system_has_cap = cpus_have_cap(caps->capability); + + if (system_has_cap) { + /* + * Check if the new CPU misses an advertised feature, + * which is not safe to miss. + */ + if (!cpu_has_cap && !cpucap_late_cpu_optional(caps)) + break; /* - * Use stop_machine() as it schedules the work allowing - * us to modify PSTATE, instead of on_each_cpu() which - * uses an IPI, giving us a PSTATE that disappears when - * we return. + * We have to issue cpu_enable() irrespective of + * whether the CPU has it or not, as it is enabeld + * system wide. It is upto the call back to take + * appropriate action on this CPU. */ - stop_machine(caps->enable, (void *)caps, cpu_online_mask); + if (caps->cpu_enable) + caps->cpu_enable(caps); + } else { + /* + * Check if the CPU has this capability if it isn't + * safe to have when the system doesn't. + */ + if (cpu_has_cap && !cpucap_late_cpu_permitted(caps)) + break; } } + + if (caps->matches) { + pr_crit("CPU%d: Detected conflict for capability %d (%s), System: %d, CPU: %d\n", + smp_processor_id(), caps->capability, + caps->desc, system_has_cap, cpu_has_cap); + return false; + } + + return true; +} + +static bool verify_local_cpu_caps(u16 scope_mask) +{ + return __verify_local_cpu_caps(arm64_errata, scope_mask) && + __verify_local_cpu_caps(arm64_features, scope_mask); } /* @@ -1262,8 +1648,13 @@ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) */ static void check_early_cpu_features(void) { - verify_cpu_run_el(); verify_cpu_asid_bits(); + /* + * Early features are used by the kernel already. If there + * is a conflict, we cannot proceed further. + */ + if (!verify_local_cpu_caps(SCOPE_BOOT_CPU)) + cpu_panic_kernel(); } static void @@ -1278,27 +1669,6 @@ verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps) } } -static void -verify_local_cpu_features(const struct arm64_cpu_capabilities *caps_list) -{ - const struct arm64_cpu_capabilities *caps = caps_list; - for (; caps->matches; caps++) { - if (!cpus_have_cap(caps->capability)) - continue; - /* - * If the new CPU misses an advertised feature, we cannot proceed - * further, park the cpu. - */ - if (!__this_cpu_has_cap(caps_list, caps->capability)) { - pr_crit("CPU%d: missing feature: %s\n", - smp_processor_id(), caps->desc); - cpu_die_early(); - } - if (caps->enable) - caps->enable((void *)caps); - } -} - static void verify_sve_features(void) { u64 safe_zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1); @@ -1316,6 +1686,7 @@ static void verify_sve_features(void) /* Add checks on other ZCR bits here if necessary */ } + /* * Run through the enabled system capabilities and enable() it on this CPU. * The capabilities were decided based on the available CPUs at the boot time. @@ -1326,8 +1697,14 @@ static void verify_sve_features(void) */ static void verify_local_cpu_capabilities(void) { - verify_local_cpu_errata_workarounds(); - verify_local_cpu_features(arm64_features); + /* + * The capabilities with SCOPE_BOOT_CPU are checked from + * check_early_cpu_features(), as they need to be verified + * on all secondary CPUs. + */ + if (!verify_local_cpu_caps(SCOPE_ALL & ~SCOPE_BOOT_CPU)) + cpu_die_early(); + verify_local_elf_hwcaps(arm64_elf_hwcaps); if (system_supports_32bit_el0()) @@ -1335,9 +1712,6 @@ static void verify_local_cpu_capabilities(void) if (system_supports_sve()) verify_sve_features(); - - if (system_uses_ttbr0_pan()) - pr_info("Emulating Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); } void check_local_cpu_capabilities(void) @@ -1350,20 +1724,22 @@ void check_local_cpu_capabilities(void) /* * If we haven't finalised the system capabilities, this CPU gets - * a chance to update the errata work arounds. + * a chance to update the errata work arounds and local features. * Otherwise, this CPU should verify that it has all the system * advertised capabilities. */ if (!sys_caps_initialised) - update_cpu_errata_workarounds(); + update_cpu_capabilities(SCOPE_LOCAL_CPU); else verify_local_cpu_capabilities(); } -static void __init setup_feature_capabilities(void) +static void __init setup_boot_cpu_capabilities(void) { - update_cpu_capabilities(arm64_features, "detected feature:"); - enable_cpu_capabilities(arm64_features); + /* Detect capabilities with either SCOPE_BOOT_CPU or SCOPE_LOCAL_CPU */ + update_cpu_capabilities(SCOPE_BOOT_CPU | SCOPE_LOCAL_CPU); + /* Enable the SCOPE_BOOT_CPU capabilities alone right away */ + enable_cpu_capabilities(SCOPE_BOOT_CPU); } DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready); @@ -1382,21 +1758,34 @@ bool this_cpu_has_cap(unsigned int cap) __this_cpu_has_cap(arm64_errata, cap)); } +static void __init setup_system_capabilities(void) +{ + /* + * We have finalised the system-wide safe feature + * registers, finalise the capabilities that depend + * on it. Also enable all the available capabilities, + * that are not enabled already. + */ + update_cpu_capabilities(SCOPE_SYSTEM); + enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU); +} + void __init setup_cpu_features(void) { u32 cwg; - int cls; - /* Set the CPU feature capabilies */ - setup_feature_capabilities(); - enable_errata_workarounds(); + setup_system_capabilities(); mark_const_caps_ready(); setup_elf_hwcaps(arm64_elf_hwcaps); if (system_supports_32bit_el0()) setup_elf_hwcaps(compat_elf_hwcaps); + if (system_uses_ttbr0_pan()) + pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); + sve_setup(); + minsigstksz_setup(); /* Advertise that we have computed the system capabilities */ set_sys_caps_initialised(); @@ -1405,13 +1794,9 @@ void __init setup_cpu_features(void) * Check for sane CTR_EL0.CWG value. */ cwg = cache_type_cwg(); - cls = cache_line_size(); if (!cwg) - pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n", - cls); - if (L1_CACHE_BYTES < cls) - pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", - L1_CACHE_BYTES, cls); + pr_warn("No Cache Writeback Granule information, assuming %d\n", + ARCH_DMA_MINALIGN); } static bool __maybe_unused @@ -1420,6 +1805,11 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) return (cpus_have_const_cap(ARM64_HAS_PAN) && !cpus_have_const_cap(ARM64_HAS_UAO)); } +static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap) +{ + cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); +} + /* * We emulate only the following system register space. * Op0 = 0x3, CRn = 0x0, Op1 = 0x0, CRm = [0, 4 - 7] @@ -1481,31 +1871,36 @@ static int emulate_sys_reg(u32 id, u64 *valp) return 0; } -static int emulate_mrs(struct pt_regs *regs, u32 insn) +int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt) { int rc; - u32 sys_reg, dst; u64 val; - /* - * sys_reg values are defined as used in mrs/msr instruction. - * shift the imm value to get the encoding. - */ - sys_reg = (u32)aarch64_insn_decode_immediate(AARCH64_INSN_IMM_16, insn) << 5; rc = emulate_sys_reg(sys_reg, &val); if (!rc) { - dst = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); - pt_regs_write_reg(regs, dst, val); + pt_regs_write_reg(regs, rt, val); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } - return rc; } +static int emulate_mrs(struct pt_regs *regs, u32 insn) +{ + u32 sys_reg, rt; + + /* + * sys_reg values are defined as used in mrs/msr instruction. + * shift the imm value to get the encoding. + */ + sys_reg = (u32)aarch64_insn_decode_immediate(AARCH64_INSN_IMM_16, insn) << 5; + rt = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); + return do_emulate_mrs(regs, sys_reg, rt); +} + static struct undef_hook mrs_hook = { .instr_mask = 0xfff00000, .instr_val = 0xd5300000, - .pstate_mask = COMPAT_PSR_MODE_MASK, + .pstate_mask = PSR_AA32_MODE_MASK, .pstate_val = PSR_MODE_EL0t, .fn = emulate_mrs, }; @@ -1517,11 +1912,3 @@ static int __init enable_mrs_emulation(void) } core_initcall(enable_mrs_emulation); - -int cpu_clear_disr(void *__unused) -{ - /* Firmware may have left a deferred SError in this register. */ - write_sysreg_s(0, SYS_DISR_EL1); - - return 0; -} diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 7f94623df8a5..bcc2831399cb 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -77,6 +77,11 @@ static const char *const hwcap_str[] = { "sha512", "sve", "asimdfhm", + "dit", + "uscat", + "ilrcpc", + "flagm", + "ssbs", NULL }; @@ -320,7 +325,15 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) { info->reg_cntfrq = arch_timer_get_cntfrq(); - info->reg_ctr = read_cpuid_cachetype(); + /* + * Use the effective value of the CTR_EL0 than the raw value + * exposed by the CPU. CTR_E0.IDC field value must be interpreted + * with the CLIDR_EL1 fields to avoid triggering false warnings + * when there is a mismatch across the CPUs. Keep track of the + * effective value of the CTR_EL0 in our internal records for + * acurate sanity check and feature enablement. + */ + info->reg_ctr = read_cpuid_effective_cachetype(); info->reg_dczid = read_cpuid(DCZID_EL0); info->reg_midr = read_cpuid_id(); info->reg_revidr = read_cpuid(REVIDR_EL1); diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/crash_core.c new file mode 100644 index 000000000000..ca4c3e12d8c5 --- /dev/null +++ b/arch/arm64/kernel/crash_core.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Linaro. + * Copyright (C) Huawei Futurewei Technologies. + */ + +#include <linux/crash_core.h> +#include <asm/memory.h> + +void arch_crash_save_vmcoreinfo(void) +{ + VMCOREINFO_NUMBER(VA_BITS); + /* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */ + vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n", + kimage_voffset); + vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n", + PHYS_OFFSET); + vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); +} diff --git a/arch/arm64/kernel/crash_dump.c b/arch/arm64/kernel/crash_dump.c index f46d57c31443..6b5037ed15b2 100644 --- a/arch/arm64/kernel/crash_dump.c +++ b/arch/arm64/kernel/crash_dump.c @@ -58,7 +58,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, /** * elfcorehdr_read - read from ELF core header * @buf: buffer where the data is placed - * @csize: number of bytes to read + * @count: number of bytes to read * @ppos: address in the memory * * This function reads @count bytes from elf core header which exists diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 53781f5687c5..d7bb6aefae0a 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -33,6 +33,7 @@ #include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/system_misc.h> +#include <asm/traps.h> /* Determine debug architecture. */ u8 debug_monitors_arch(void) @@ -209,13 +210,6 @@ NOKPROBE_SYMBOL(call_step_hook); static void send_user_sigtrap(int si_code) { struct pt_regs *regs = current_pt_regs(); - siginfo_t info; - - clear_siginfo(&info); - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = si_code; - info.si_addr = (void __user *)instruction_pointer(regs); if (WARN_ON(!user_mode(regs))) return; @@ -223,7 +217,9 @@ static void send_user_sigtrap(int si_code) if (interrupts_enabled(regs)) local_irq_enable(); - force_sig_info(SIGTRAP, &info, current); + arm64_force_sig_fault(SIGTRAP, si_code, + (void __user *)instruction_pointer(regs), + "User debug trap"); } static int single_step_handler(unsigned long addr, unsigned int esr, diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S new file mode 100644 index 000000000000..05235ebb336d --- /dev/null +++ b/arch/arm64/kernel/efi-rt-wrapper.S @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> + +ENTRY(__efi_rt_asm_wrapper) + stp x29, x30, [sp, #-32]! + mov x29, sp + + /* + * Register x18 is designated as the 'platform' register by the AAPCS, + * which means firmware running at the same exception level as the OS + * (such as UEFI) should never touch it. + */ + stp x1, x18, [sp, #16] + + /* + * We are lucky enough that no EFI runtime services take more than + * 5 arguments, so all are passed in registers rather than via the + * stack. + */ + mov x8, x0 + mov x0, x2 + mov x1, x3 + mov x2, x4 + mov x3, x5 + mov x4, x6 + blr x8 + + ldp x1, x2, [sp, #16] + cmp x2, x18 + ldp x29, x30, [sp], #32 + b.ne 0f + ret +0: b efi_handle_corrupted_x18 // tail call +ENDPROC(__efi_rt_asm_wrapper) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index a8bf1c892b90..4f9acb5fbe97 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -126,3 +126,9 @@ bool efi_poweroff_required(void) { return efi_enabled(EFI_RUNTIME_SERVICES); } + +asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f) +{ + pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f); + return s; +} diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 73f17bffcd23..12d4958e6429 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -49,7 +49,7 @@ ENTRY(sve_save_state) ENDPROC(sve_save_state) ENTRY(sve_load_state) - sve_load 0, x1, x2, 3 + sve_load 0, x1, x2, 3, x4 ret ENDPROC(sve_load_state) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index ec2ee720e33e..039144ecbcb2 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -18,6 +18,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/arm-smccc.h> #include <linux/init.h> #include <linux/linkage.h> @@ -40,19 +41,9 @@ * Context tracking subsystem. Used to instrument transitions * between user and kernel mode. */ - .macro ct_user_exit, syscall = 0 + .macro ct_user_exit #ifdef CONFIG_CONTEXT_TRACKING bl context_tracking_user_exit - .if \syscall == 1 - /* - * Save/restore needed during syscalls. Restore syscall arguments from - * the values already saved on stack during kernel_entry. - */ - ldp x0, x1, [sp] - ldp x2, x3, [sp, #S_X2] - ldp x4, x5, [sp, #S_X4] - ldp x6, x7, [sp, #S_X6] - .endif #endif .endm @@ -62,6 +53,12 @@ #endif .endm + .macro clear_gp_regs + .irp n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29 + mov x\n, xzr + .endr + .endm + /* * Bad Abort numbers *----------------- @@ -137,6 +134,26 @@ alternative_else_nop_endif add \dst, \dst, #(\sym - .entry.tramp.text) .endm + // This macro corrupts x0-x3. It is the caller's duty + // to save/restore them if required. + .macro apply_ssbd, state, tmp1, tmp2 +#ifdef CONFIG_ARM64_SSBD +alternative_cb arm64_enable_wa2_handling + b .L__asm_ssbd_skip\@ +alternative_cb_end + ldr_this_cpu \tmp2, arm64_ssbd_callback_required, \tmp1 + cbz \tmp2, .L__asm_ssbd_skip\@ + ldr \tmp2, [tsk, #TSK_TI_FLAGS] + tbnz \tmp2, #TIF_SSBD, .L__asm_ssbd_skip\@ + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 + mov w1, #\state +alternative_cb arm64_update_smccc_conduit + nop // Patched to SMC/HVC #0 +alternative_cb_end +.L__asm_ssbd_skip\@: +#endif + .endm + .macro kernel_entry, el, regsize = 64 .if \regsize == 32 mov w0, w0 // zero upper 32 bits of x0 @@ -158,12 +175,14 @@ alternative_else_nop_endif stp x28, x29, [sp, #16 * 14] .if \el == 0 + clear_gp_regs mrs x21, sp_el0 ldr_this_cpu tsk, __entry_task, x20 // Ensure MDSCR_EL1.SS is clear, ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug disable_step_tsk x19, x20 // exceptions when scheduling. - mov x29, xzr // fp pointed to user-space + apply_ssbd 1, x22, x23 + .else add x21, sp, #S_FRAME_SIZE get_thread_info tsk @@ -303,6 +322,7 @@ alternative_if ARM64_WORKAROUND_845719 alternative_else_nop_endif #endif 3: + apply_ssbd 0, x0, x1 .endif msr elr_el1, x21 // set up the return data @@ -569,7 +589,7 @@ el1_undef: inherit_daif pstate=x23, tmp=x2 mov x0, sp bl do_undefinstr - ASM_BUG() + kernel_exit 1 el1_dbg: /* * Debug exception handling @@ -645,6 +665,7 @@ el0_sync: cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception b.eq el0_fpsimd_exc cmp x24, #ESR_ELx_EC_SYS64 // configurable trap + ccmp x24, #ESR_ELx_EC_WFx, #4, ne b.eq el0_sys cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception b.eq el0_sp_pc @@ -677,9 +698,9 @@ el0_sync_compat: cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 b.eq el0_undef cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap - b.eq el0_undef + b.eq el0_cp15 cmp x24, #ESR_ELx_EC_CP15_64 // CP15 MRRC/MCRR trap - b.eq el0_undef + b.eq el0_cp15 cmp x24, #ESR_ELx_EC_CP14_MR // CP14 MRC/MCR trap b.eq el0_undef cmp x24, #ESR_ELx_EC_CP14_LS // CP14 LDC/STC trap @@ -690,14 +711,9 @@ el0_sync_compat: b.ge el0_dbg b el0_inv el0_svc_compat: - /* - * AArch32 syscall handling - */ - ldr x16, [tsk, #TSK_TI_FLAGS] // load thread flags - adrp stbl, compat_sys_call_table // load compat syscall table pointer - mov wscno, w7 // syscall number in w7 (r7) - mov wsc_nr, #__NR_compat_syscalls - b el0_svc_naked + mov x0, sp + bl el0_svc_compat_handler + b ret_to_user .align 6 el0_irq_compat: @@ -707,6 +723,17 @@ el0_irq_compat: el0_error_compat: kernel_entry 0, 32 b el0_error_naked + +el0_cp15: + /* + * Trapped CP15 (MRC, MCR, MRRC, MCRR) instructions + */ + enable_daif + ct_user_exit + mov x0, x25 + mov x1, sp + bl do_cp15instr + b ret_to_user #endif el0_da: @@ -866,25 +893,6 @@ el0_error_naked: b ret_to_user ENDPROC(el0_error) - -/* - * This is the fast syscall return path. We do as little as possible here, - * and this includes saving x0 back into the kernel stack. - */ -ret_fast_syscall: - disable_daif - str x0, [sp, #S_X0] // returned x0 - ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for syscall tracing - and x2, x1, #_TIF_SYSCALL_WORK - cbnz x2, ret_fast_syscall_trace - and x2, x1, #_TIF_WORK_MASK - cbnz x2, work_pending - enable_step_tsk x1, x2 - kernel_exit 0 -ret_fast_syscall_trace: - enable_daif - b __sys_trace_return_skipped // we already saved x0 - /* * Ok, we need to do extra processing, enter the slow path. */ @@ -906,6 +914,9 @@ ret_to_user: cbnz x2, work_pending finish_ret_to_user: enable_step_tsk x1, x2 +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK + bl stackleak_erase +#endif kernel_exit 0 ENDPROC(ret_to_user) @@ -914,85 +925,10 @@ ENDPROC(ret_to_user) */ .align 6 el0_svc: - ldr x16, [tsk, #TSK_TI_FLAGS] // load thread flags - adrp stbl, sys_call_table // load syscall table pointer - mov wscno, w8 // syscall number in w8 - mov wsc_nr, #__NR_syscalls - -#ifdef CONFIG_ARM64_SVE -alternative_if_not ARM64_SVE - b el0_svc_naked -alternative_else_nop_endif - tbz x16, #TIF_SVE, el0_svc_naked // Skip unless TIF_SVE set: - bic x16, x16, #_TIF_SVE // discard SVE state - str x16, [tsk, #TSK_TI_FLAGS] - - /* - * task_fpsimd_load() won't be called to update CPACR_EL1 in - * ret_to_user unless TIF_FOREIGN_FPSTATE is still set, which only - * happens if a context switch or kernel_neon_begin() or context - * modification (sigreturn, ptrace) intervenes. - * So, ensure that CPACR_EL1 is already correct for the fast-path case: - */ - mrs x9, cpacr_el1 - bic x9, x9, #CPACR_EL1_ZEN_EL0EN // disable SVE for el0 - msr cpacr_el1, x9 // synchronised by eret to el0 -#endif - -el0_svc_naked: // compat entry point - stp x0, xscno, [sp, #S_ORIG_X0] // save the original x0 and syscall number - enable_daif - ct_user_exit 1 - - tst x16, #_TIF_SYSCALL_WORK // check for syscall hooks - b.ne __sys_trace - cmp wscno, wsc_nr // check upper syscall limit - b.hs ni_sys - mask_nospec64 xscno, xsc_nr, x19 // enforce bounds for syscall number - ldr x16, [stbl, xscno, lsl #3] // address in the syscall table - blr x16 // call sys_* routine - b ret_fast_syscall -ni_sys: mov x0, sp - bl do_ni_syscall - b ret_fast_syscall -ENDPROC(el0_svc) - - /* - * This is the really slow path. We're going to be doing context - * switches, and waiting for our parent to respond. - */ -__sys_trace: - cmp wscno, #NO_SYSCALL // user-issued syscall(-1)? - b.ne 1f - mov x0, #-ENOSYS // set default errno if so - str x0, [sp, #S_X0] -1: mov x0, sp - bl syscall_trace_enter - cmp w0, #NO_SYSCALL // skip the syscall? - b.eq __sys_trace_return_skipped - mov wscno, w0 // syscall number (possibly new) - mov x1, sp // pointer to regs - cmp wscno, wsc_nr // check upper syscall limit - b.hs __ni_sys_trace - ldp x0, x1, [sp] // restore the syscall args - ldp x2, x3, [sp, #S_X2] - ldp x4, x5, [sp, #S_X4] - ldp x6, x7, [sp, #S_X6] - ldr x16, [stbl, xscno, lsl #3] // address in the syscall table - blr x16 // call sys_* routine - -__sys_trace_return: - str x0, [sp, #S_X0] // save returned x0 -__sys_trace_return_skipped: - mov x0, sp - bl syscall_trace_exit + bl el0_svc_handler b ret_to_user - -__ni_sys_trace: - mov x0, sp - bl do_ni_syscall - b __sys_trace_return +ENDPROC(el0_svc) .popsection // .entry.text @@ -1108,14 +1044,6 @@ __entry_tramp_data_start: #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ /* - * Special system call wrappers. - */ -ENTRY(sys_rt_sigreturn_wrapper) - mov x0, sp - b sys_rt_sigreturn -ENDPROC(sys_rt_sigreturn_wrapper) - -/* * Register switch for AArch64. The callee-saved registers need to be saved * and restored. On entry: * x0 = previous task_struct (must be preserved across the switch) diff --git a/arch/arm64/kernel/entry32.S b/arch/arm64/kernel/entry32.S deleted file mode 100644 index f332d5d1f6b4..000000000000 --- a/arch/arm64/kernel/entry32.S +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Compat system call wrappers - * - * Copyright (C) 2012 ARM Ltd. - * Authors: Will Deacon <will.deacon@arm.com> - * Catalin Marinas <catalin.marinas@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/linkage.h> -#include <linux/const.h> - -#include <asm/assembler.h> -#include <asm/asm-offsets.h> -#include <asm/errno.h> -#include <asm/page.h> - -/* - * System call wrappers for the AArch32 compatibility layer. - */ - -ENTRY(compat_sys_sigreturn_wrapper) - mov x0, sp - b compat_sys_sigreturn -ENDPROC(compat_sys_sigreturn_wrapper) - -ENTRY(compat_sys_rt_sigreturn_wrapper) - mov x0, sp - b compat_sys_rt_sigreturn -ENDPROC(compat_sys_rt_sigreturn_wrapper) - -ENTRY(compat_sys_statfs64_wrapper) - mov w3, #84 - cmp w1, #88 - csel w1, w3, w1, eq - b compat_sys_statfs64 -ENDPROC(compat_sys_statfs64_wrapper) - -ENTRY(compat_sys_fstatfs64_wrapper) - mov w3, #84 - cmp w1, #88 - csel w1, w3, w1, eq - b compat_sys_fstatfs64 -ENDPROC(compat_sys_fstatfs64_wrapper) - -/* - * Note: off_4k (w5) is always in units of 4K. If we can't do the - * requested offset because it is not page-aligned, we return -EINVAL. - */ -ENTRY(compat_sys_mmap2_wrapper) -#if PAGE_SHIFT > 12 - tst w5, #~PAGE_MASK >> 12 - b.ne 1f - lsr w5, w5, #PAGE_SHIFT - 12 -#endif - b sys_mmap_pgoff -1: mov x0, #-EINVAL - ret -ENDPROC(compat_sys_mmap2_wrapper) - -/* - * Wrappers for AArch32 syscalls that either take 64-bit parameters - * in registers or that take 32-bit parameters which require sign - * extension. - */ -ENTRY(compat_sys_pread64_wrapper) - regs_to_64 x3, x4, x5 - b sys_pread64 -ENDPROC(compat_sys_pread64_wrapper) - -ENTRY(compat_sys_pwrite64_wrapper) - regs_to_64 x3, x4, x5 - b sys_pwrite64 -ENDPROC(compat_sys_pwrite64_wrapper) - -ENTRY(compat_sys_truncate64_wrapper) - regs_to_64 x1, x2, x3 - b sys_truncate -ENDPROC(compat_sys_truncate64_wrapper) - -ENTRY(compat_sys_ftruncate64_wrapper) - regs_to_64 x1, x2, x3 - b sys_ftruncate -ENDPROC(compat_sys_ftruncate64_wrapper) - -ENTRY(compat_sys_readahead_wrapper) - regs_to_64 x1, x2, x3 - mov w2, w4 - b sys_readahead -ENDPROC(compat_sys_readahead_wrapper) - -ENTRY(compat_sys_fadvise64_64_wrapper) - mov w6, w1 - regs_to_64 x1, x2, x3 - regs_to_64 x2, x4, x5 - mov w3, w6 - b sys_fadvise64_64 -ENDPROC(compat_sys_fadvise64_64_wrapper) - -ENTRY(compat_sys_sync_file_range2_wrapper) - regs_to_64 x2, x2, x3 - regs_to_64 x3, x4, x5 - b sys_sync_file_range2 -ENDPROC(compat_sys_sync_file_range2_wrapper) - -ENTRY(compat_sys_fallocate_wrapper) - regs_to_64 x2, x2, x3 - regs_to_64 x3, x4, x5 - b sys_fallocate -ENDPROC(compat_sys_fallocate_wrapper) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index e7226c4c7493..5ebe73b69961 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -31,16 +31,19 @@ #include <linux/percpu.h> #include <linux/prctl.h> #include <linux/preempt.h> -#include <linux/prctl.h> #include <linux/ptrace.h> #include <linux/sched/signal.h> #include <linux/sched/task_stack.h> #include <linux/signal.h> #include <linux/slab.h> +#include <linux/stddef.h> #include <linux/sysctl.h> +#include <asm/esr.h> #include <asm/fpsimd.h> +#include <asm/cpufeature.h> #include <asm/cputype.h> +#include <asm/processor.h> #include <asm/simd.h> #include <asm/sigcontext.h> #include <asm/sysreg.h> @@ -64,7 +67,7 @@ * been loaded into its FPSIMD registers most recently, or whether it has * been used to perform kernel mode NEON in the meantime. * - * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to + * For (a), we add a fpsimd_cpu field to thread_struct, which gets updated to * the id of the current CPU every time the state is loaded onto a CPU. For (b), * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the * address of the userland FPSIMD state of the task that was loaded onto the CPU @@ -73,7 +76,7 @@ * With this in place, we no longer have to restore the next FPSIMD state right * when switching between tasks. Instead, we can defer this check to userland * resume, at which time we verify whether the CPU's fpsimd_last_state and the - * task's fpsimd_state.cpu are still mutually in sync. If this is the case, we + * task's fpsimd_cpu are still mutually in sync. If this is the case, we * can omit the FPSIMD restore. * * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to @@ -90,14 +93,14 @@ * flag with local_bh_disable() unless softirqs are already masked. * * For a certain task, the sequence may look something like this: - * - the task gets scheduled in; if both the task's fpsimd_state.cpu field + * - the task gets scheduled in; if both the task's fpsimd_cpu field * contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu * variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is * cleared, otherwise it is set; * * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's * userland FPSIMD state is copied from memory to the registers, the task's - * fpsimd_state.cpu field is set to the id of the current CPU, the current + * fpsimd_cpu field is set to the id of the current CPU, the current * CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the * TIF_FOREIGN_FPSTATE flag is cleared; * @@ -115,8 +118,7 @@ * whatever is in the FPSIMD registers is not saved to memory, but discarded. */ struct fpsimd_last_state_struct { - struct fpsimd_state *st; - bool sve_in_use; + struct user_fpsimd_state *st; }; static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); @@ -127,7 +129,7 @@ static int sve_default_vl = -1; #ifdef CONFIG_ARM64_SVE /* Maximum supported vector length across all CPUs (initially poisoned) */ -int __ro_after_init sve_max_vl = -1; +int __ro_after_init sve_max_vl = SVE_VL_MIN; /* Set of available vector lengths, as vq_to_bit(vq): */ static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX); static void __percpu *efi_sve_state; @@ -157,38 +159,6 @@ static void sve_free(struct task_struct *task) __sve_free(task); } - -/* Offset of FFR in the SVE register dump */ -static size_t sve_ffr_offset(int vl) -{ - return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET; -} - -static void *sve_pffr(struct task_struct *task) -{ - return (char *)task->thread.sve_state + - sve_ffr_offset(task->thread.sve_vl); -} - -static void change_cpacr(u64 val, u64 mask) -{ - u64 cpacr = read_sysreg(CPACR_EL1); - u64 new = (cpacr & ~mask) | val; - - if (new != cpacr) - write_sysreg(new, CPACR_EL1); -} - -static void sve_user_disable(void) -{ - change_cpacr(0, CPACR_EL1_ZEN_EL0EN); -} - -static void sve_user_enable(void) -{ - change_cpacr(CPACR_EL1_ZEN_EL0EN, CPACR_EL1_ZEN_EL0EN); -} - /* * TIF_SVE controls whether a task can use SVE without trapping while * in userspace, and also the way a task's FPSIMD/SVE state is stored @@ -222,7 +192,7 @@ static void sve_user_enable(void) * sets TIF_SVE. * * When stored, FPSIMD registers V0-V31 are encoded in - * task->fpsimd_state; bits [max : 128] for each of Z0-Z31 are + * task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are * logically zero but not stored anywhere; P0-P15 and FFR are not * stored and have unspecified values from userspace's point of * view. For hygiene purposes, the kernel zeroes them on next use, @@ -231,9 +201,9 @@ static void sve_user_enable(void) * task->thread.sve_state does not need to be non-NULL, valid or any * particular size: it must not be dereferenced. * - * * FPSR and FPCR are always stored in task->fpsimd_state irrespctive of - * whether TIF_SVE is clear or set, since these are not vector length - * dependent. + * * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state + * irrespective of whether TIF_SVE is clear or set, since these are + * not vector length dependent. */ /* @@ -250,31 +220,24 @@ static void task_fpsimd_load(void) WARN_ON(!in_softirq() && !irqs_disabled()); if (system_supports_sve() && test_thread_flag(TIF_SVE)) - sve_load_state(sve_pffr(current), - ¤t->thread.fpsimd_state.fpsr, + sve_load_state(sve_pffr(¤t->thread), + ¤t->thread.uw.fpsimd_state.fpsr, sve_vq_from_vl(current->thread.sve_vl) - 1); else - fpsimd_load_state(¤t->thread.fpsimd_state); - - if (system_supports_sve()) { - /* Toggle SVE trapping for userspace if needed */ - if (test_thread_flag(TIF_SVE)) - sve_user_enable(); - else - sve_user_disable(); - - /* Serialised by exception return to user */ - } + fpsimd_load_state(¤t->thread.uw.fpsimd_state); } /* - * Ensure current's FPSIMD/SVE storage in thread_struct is up to date - * with respect to the CPU registers. + * Ensure FPSIMD/SVE storage in memory for the loaded context is up to + * date with respect to the CPU registers. * * Softirqs (and preemption) must be disabled. */ -static void task_fpsimd_save(void) +void fpsimd_save(void) { + struct user_fpsimd_state *st = __this_cpu_read(fpsimd_last_state.st); + /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ + WARN_ON(!in_softirq() && !irqs_disabled()); if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { @@ -285,15 +248,13 @@ static void task_fpsimd_save(void) * re-enter user with corrupt state. * There's no way to recover, so kill it: */ - force_signal_inject( - SIGKILL, 0, current_pt_regs(), 0); + force_signal_inject(SIGKILL, SI_KERNEL, 0); return; } - sve_save_state(sve_pffr(current), - ¤t->thread.fpsimd_state.fpsr); + sve_save_state(sve_pffr(¤t->thread), &st->fpsr); } else - fpsimd_save_state(¤t->thread.fpsimd_state); + fpsimd_save_state(st); } } @@ -359,22 +320,13 @@ static int sve_proc_do_default_vl(struct ctl_table *table, int write, return ret; /* Writing -1 has the special meaning "set to max": */ - if (vl == -1) { - /* Fail safe if sve_max_vl wasn't initialised */ - if (WARN_ON(!sve_vl_valid(sve_max_vl))) - vl = SVE_VL_MIN; - else - vl = sve_max_vl; - - goto chosen; - } + if (vl == -1) + vl = sve_max_vl; if (!sve_vl_valid(vl)) return -EINVAL; - vl = find_supported_vector_length(vl); -chosen: - sve_default_vl = vl; + sve_default_vl = find_supported_vector_length(vl); return 0; } @@ -404,20 +356,21 @@ static int __init sve_sysctl_init(void) { return 0; } (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET)) /* - * Transfer the FPSIMD state in task->thread.fpsimd_state to + * Transfer the FPSIMD state in task->thread.uw.fpsimd_state to * task->thread.sve_state. * * Task can be a non-runnable task, or current. In the latter case, * softirqs (and preemption) must be disabled. * task->thread.sve_state must point to at least sve_state_size(task) * bytes of allocated kernel memory. - * task->thread.fpsimd_state must be up to date before calling this function. + * task->thread.uw.fpsimd_state must be up to date before calling this + * function. */ static void fpsimd_to_sve(struct task_struct *task) { unsigned int vq; void *sst = task->thread.sve_state; - struct fpsimd_state const *fst = &task->thread.fpsimd_state; + struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; unsigned int i; if (!system_supports_sve()) @@ -431,7 +384,7 @@ static void fpsimd_to_sve(struct task_struct *task) /* * Transfer the SVE state in task->thread.sve_state to - * task->thread.fpsimd_state. + * task->thread.uw.fpsimd_state. * * Task can be a non-runnable task, or current. In the latter case, * softirqs (and preemption) must be disabled. @@ -443,7 +396,7 @@ static void sve_to_fpsimd(struct task_struct *task) { unsigned int vq; void const *sst = task->thread.sve_state; - struct fpsimd_state *fst = &task->thread.fpsimd_state; + struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state; unsigned int i; if (!system_supports_sve()) @@ -510,7 +463,7 @@ void fpsimd_sync_to_sve(struct task_struct *task) } /* - * Ensure that task->thread.fpsimd_state is up to date with respect to + * Ensure that task->thread.uw.fpsimd_state is up to date with respect to * the user task, irrespective of whether SVE is in use or not. * * This should only be called by ptrace. task must be non-runnable. @@ -525,21 +478,21 @@ void sve_sync_to_fpsimd(struct task_struct *task) /* * Ensure that task->thread.sve_state is up to date with respect to - * the task->thread.fpsimd_state. + * the task->thread.uw.fpsimd_state. * * This should only be called by ptrace to merge new FPSIMD register * values into a task for which SVE is currently active. * task must be non-runnable. * task->thread.sve_state must point to at least sve_state_size(task) * bytes of allocated kernel memory. - * task->thread.fpsimd_state must already have been initialised with + * task->thread.uw.fpsimd_state must already have been initialised with * the new FPSIMD register values to be merged in. */ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { unsigned int vq; void *sst = task->thread.sve_state; - struct fpsimd_state const *fst = &task->thread.fpsimd_state; + struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; unsigned int i; if (!test_tsk_thread_flag(task, TIF_SVE)) @@ -596,7 +549,7 @@ int sve_set_vector_length(struct task_struct *task, if (task == current) { local_bh_disable(); - task_fpsimd_save(); + fpsimd_save(); set_thread_flag(TIF_FOREIGN_FPSTATE); } @@ -616,10 +569,8 @@ int sve_set_vector_length(struct task_struct *task, task->thread.sve_vl = vl; out: - if (flags & PR_SVE_VL_INHERIT) - set_tsk_thread_flag(task, TIF_SVE_VL_INHERIT); - else - clear_tsk_thread_flag(task, TIF_SVE_VL_INHERIT); + update_tsk_thread_flag(task, TIF_SVE_VL_INHERIT, + flags & PR_SVE_VL_INHERIT); return 0; } @@ -757,12 +708,37 @@ fail: * Enable SVE for EL1. * Intended for use by the cpufeatures code during CPU boot. */ -int sve_kernel_enable(void *__always_unused p) +void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) { write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1); isb(); +} - return 0; +/* + * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE + * vector length. + * + * Use only if SVE is present. + * This function clobbers the SVE vector length. + */ +u64 read_zcr_features(void) +{ + u64 zcr; + unsigned int vq_max; + + /* + * Set the maximum possible VL, and write zeroes to all other + * bits to see if they stick. + */ + sve_kernel_enable(NULL); + write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1); + + zcr = read_sysreg_s(SYS_ZCR_EL1); + zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */ + vq_max = sve_vq_from_vl(sve_get_vl()); + zcr |= vq_max - 1; /* set LEN field to maximum effective value */ + + return zcr; } void __init sve_setup(void) @@ -831,7 +807,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs) { /* Even if we chose not to use SVE, the hardware could still trap: */ if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) { - force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); return; } @@ -839,7 +815,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs) local_bh_disable(); - task_fpsimd_save(); + fpsimd_save(); fpsimd_to_sve(current); /* Force ret_to_user to reload the registers: */ @@ -866,56 +842,47 @@ asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) */ asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) { - siginfo_t info; - unsigned int si_code = FPE_FIXME; - - if (esr & FPEXC_IOF) - si_code = FPE_FLTINV; - else if (esr & FPEXC_DZF) - si_code = FPE_FLTDIV; - else if (esr & FPEXC_OFF) - si_code = FPE_FLTOVF; - else if (esr & FPEXC_UFF) - si_code = FPE_FLTUND; - else if (esr & FPEXC_IXF) - si_code = FPE_FLTRES; - - memset(&info, 0, sizeof(info)); - info.si_signo = SIGFPE; - info.si_code = si_code; - info.si_addr = (void __user *)instruction_pointer(regs); - - send_sig_info(SIGFPE, &info, current); + unsigned int si_code = FPE_FLTUNK; + + if (esr & ESR_ELx_FP_EXC_TFV) { + if (esr & FPEXC_IOF) + si_code = FPE_FLTINV; + else if (esr & FPEXC_DZF) + si_code = FPE_FLTDIV; + else if (esr & FPEXC_OFF) + si_code = FPE_FLTOVF; + else if (esr & FPEXC_UFF) + si_code = FPE_FLTUND; + else if (esr & FPEXC_IXF) + si_code = FPE_FLTRES; + } + + send_sig_fault(SIGFPE, si_code, + (void __user *)instruction_pointer(regs), + current); } void fpsimd_thread_switch(struct task_struct *next) { + bool wrong_task, wrong_cpu; + if (!system_supports_fpsimd()) return; + + /* Save unsaved fpsimd state, if any: */ + fpsimd_save(); + /* - * Save the current FPSIMD state to memory, but only if whatever is in - * the registers is in fact the most recent userland FPSIMD state of - * 'current'. + * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's + * state. For kernel threads, FPSIMD registers are never loaded + * and wrong_task and wrong_cpu will always be true. */ - if (current->mm) - task_fpsimd_save(); + wrong_task = __this_cpu_read(fpsimd_last_state.st) != + &next->thread.uw.fpsimd_state; + wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id(); - if (next->mm) { - /* - * If we are switching to a task whose most recent userland - * FPSIMD state is already in the registers of *this* cpu, - * we can skip loading the state from memory. Otherwise, set - * the TIF_FOREIGN_FPSTATE flag so the state will be loaded - * upon the next return to userland. - */ - struct fpsimd_state *st = &next->thread.fpsimd_state; - - if (__this_cpu_read(fpsimd_last_state.st) == st - && st->cpu == smp_processor_id()) - clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE); - else - set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE); - } + update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE, + wrong_task || wrong_cpu); } void fpsimd_flush_thread(void) @@ -927,7 +894,8 @@ void fpsimd_flush_thread(void) local_bh_disable(); - memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); + memset(¤t->thread.uw.fpsimd_state, 0, + sizeof(current->thread.uw.fpsimd_state)); fpsimd_flush_task_state(current); if (system_supports_sve()) { @@ -980,13 +948,13 @@ void fpsimd_preserve_current_state(void) return; local_bh_disable(); - task_fpsimd_save(); + fpsimd_save(); local_bh_enable(); } /* * Like fpsimd_preserve_current_state(), but ensure that - * current->thread.fpsimd_state is updated so that it can be copied to + * current->thread.uw.fpsimd_state is updated so that it can be copied to * the signal frame. */ void fpsimd_signal_preserve_current_state(void) @@ -1000,15 +968,33 @@ void fpsimd_signal_preserve_current_state(void) * Associate current's FPSIMD context with this cpu * Preemption must be disabled when calling this function. */ -static void fpsimd_bind_to_cpu(void) +void fpsimd_bind_task_to_cpu(void) +{ + struct fpsimd_last_state_struct *last = + this_cpu_ptr(&fpsimd_last_state); + + last->st = ¤t->thread.uw.fpsimd_state; + current->thread.fpsimd_cpu = smp_processor_id(); + + if (system_supports_sve()) { + /* Toggle SVE trapping for userspace if needed */ + if (test_thread_flag(TIF_SVE)) + sve_user_enable(); + else + sve_user_disable(); + + /* Serialised by exception return to user */ + } +} + +void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st) { struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); - struct fpsimd_state *st = ¤t->thread.fpsimd_state; + + WARN_ON(!in_softirq() && !irqs_disabled()); last->st = st; - last->sve_in_use = test_thread_flag(TIF_SVE); - st->cpu = smp_processor_id(); } /* @@ -1025,7 +1011,7 @@ void fpsimd_restore_current_state(void) if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { task_fpsimd_load(); - fpsimd_bind_to_cpu(); + fpsimd_bind_task_to_cpu(); } local_bh_enable(); @@ -1043,14 +1029,14 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) local_bh_disable(); - current->thread.fpsimd_state.user_fpsimd = *state; + current->thread.uw.fpsimd_state = *state; if (system_supports_sve() && test_thread_flag(TIF_SVE)) fpsimd_to_sve(current); task_fpsimd_load(); + fpsimd_bind_task_to_cpu(); - if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) - fpsimd_bind_to_cpu(); + clear_thread_flag(TIF_FOREIGN_FPSTATE); local_bh_enable(); } @@ -1060,32 +1046,15 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) */ void fpsimd_flush_task_state(struct task_struct *t) { - t->thread.fpsimd_state.cpu = NR_CPUS; + t->thread.fpsimd_cpu = NR_CPUS; } -static inline void fpsimd_flush_cpu_state(void) +void fpsimd_flush_cpu_state(void) { __this_cpu_write(fpsimd_last_state.st, NULL); + set_thread_flag(TIF_FOREIGN_FPSTATE); } -/* - * Invalidate any task SVE state currently held in this CPU's regs. - * - * This is used to prevent the kernel from trying to reuse SVE register data - * that is detroyed by KVM guest enter/exit. This function should go away when - * KVM SVE support is implemented. Don't use it for anything else. - */ -#ifdef CONFIG_ARM64_SVE -void sve_flush_cpu_state(void) -{ - struct fpsimd_last_state_struct const *last = - this_cpu_ptr(&fpsimd_last_state); - - if (last->st && last->sve_in_use) - fpsimd_flush_cpu_state(); -} -#endif /* CONFIG_ARM64_SVE */ - #ifdef CONFIG_KERNEL_MODE_NEON DEFINE_PER_CPU(bool, kernel_neon_busy); @@ -1119,11 +1088,8 @@ void kernel_neon_begin(void) __this_cpu_write(kernel_neon_busy, true); - /* Save unsaved task fpsimd state, if any: */ - if (current->mm) { - task_fpsimd_save(); - set_thread_flag(TIF_FOREIGN_FPSTATE); - } + /* Save unsaved fpsimd state, if any: */ + fpsimd_save(); /* Invalidate any task state remaining in the fpsimd regs: */ fpsimd_flush_cpu_state(); @@ -1159,7 +1125,7 @@ EXPORT_SYMBOL(kernel_neon_end); #ifdef CONFIG_EFI -static DEFINE_PER_CPU(struct fpsimd_state, efi_fpsimd_state); +static DEFINE_PER_CPU(struct user_fpsimd_state, efi_fpsimd_state); static DEFINE_PER_CPU(bool, efi_fpsimd_state_used); static DEFINE_PER_CPU(bool, efi_sve_state_used); @@ -1245,13 +1211,10 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self, { switch (cmd) { case CPU_PM_ENTER: - if (current->mm) - task_fpsimd_save(); + fpsimd_save(); fpsimd_flush_cpu_state(); break; case CPU_PM_EXIT: - if (current->mm) - set_thread_flag(TIF_FOREIGN_FPSTATE); break; case CPU_PM_ENTER_FAILED: default: diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 2b6b8b24e5ab..4471f570a295 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -287,19 +287,21 @@ __create_page_tables: mov x28, lr /* - * Invalidate the idmap and swapper page tables to avoid potential - * dirty cache lines being evicted. + * Invalidate the init page tables to avoid potential dirty cache lines + * being evicted. Other page tables are allocated in rodata as part of + * the kernel image, and thus are clean to the PoC per the boot + * protocol. */ - adrp x0, idmap_pg_dir - adrp x1, swapper_pg_end + adrp x0, init_pg_dir + adrp x1, init_pg_end sub x1, x1, x0 bl __inval_dcache_area /* - * Clear the idmap and swapper page tables. + * Clear the init page tables. */ - adrp x0, idmap_pg_dir - adrp x1, swapper_pg_end + adrp x0, init_pg_dir + adrp x1, init_pg_end sub x1, x1, x0 1: stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 @@ -373,7 +375,7 @@ __create_page_tables: /* * Map the kernel image (starting with PHYS_OFFSET). */ - adrp x0, swapper_pg_dir + adrp x0, init_pg_dir mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) add x5, x5, x23 // add KASLR displacement mov x4, PTRS_PER_PGD @@ -390,7 +392,7 @@ __create_page_tables: * tables again to remove any speculatively loaded cache lines. */ adrp x0, idmap_pg_dir - adrp x1, swapper_pg_end + adrp x1, init_pg_end sub x1, x1, x0 dmb sy bl __inval_dcache_area @@ -577,6 +579,13 @@ set_hcr: 7: msr mdcr_el2, x3 // Configure debug traps + /* LORegions */ + mrs x1, id_aa64mmfr1_el1 + ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4 + cbz x0, 1f + msr_s SYS_LORC_EL1, xzr +1: + /* Stage-2 translation */ msr vttbr_el2, xzr @@ -699,6 +708,7 @@ secondary_startup: * Common entry point for secondary CPUs. */ bl __cpu_setup // initialise processor + adrp x1, swapper_pg_dir bl __enable_mmu ldr x8, =__secondary_switched br x8 @@ -741,6 +751,7 @@ ENDPROC(__secondary_switched) * Enable the MMU. * * x0 = SCTLR_EL1 value for turning on the MMU. + * x1 = TTBR1_EL1 value * * Returns to the caller via x30/lr. This requires the caller to be covered * by the .idmap.text section. @@ -749,17 +760,16 @@ ENDPROC(__secondary_switched) * If it isn't, park the CPU */ ENTRY(__enable_mmu) - mrs x1, ID_AA64MMFR0_EL1 - ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 + mrs x2, ID_AA64MMFR0_EL1 + ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4 cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED b.ne __no_granule_support - update_early_cpu_boot_status 0, x1, x2 - adrp x1, idmap_pg_dir - adrp x2, swapper_pg_dir - phys_to_ttbr x3, x1 - phys_to_ttbr x4, x2 - msr ttbr0_el1, x3 // load TTBR0 - msr ttbr1_el1, x4 // load TTBR1 + update_early_cpu_boot_status 0, x2, x3 + adrp x2, idmap_pg_dir + phys_to_ttbr x1, x1 + phys_to_ttbr x2, x2 + msr ttbr0_el1, x2 // load TTBR0 + msr ttbr1_el1, x1 // load TTBR1 isb msr sctlr_el1, x0 isb @@ -816,6 +826,7 @@ __primary_switch: mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value #endif + adrp x1, init_pg_dir bl __enable_mmu #ifdef CONFIG_RELOCATABLE bl __relocate_kernel diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 1ec5f28c39fc..6b2686d54411 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -313,6 +313,17 @@ int swsusp_arch_suspend(void) sleep_cpu = -EINVAL; __cpu_suspend_exit(); + + /* + * Just in case the boot kernel did turn the SSBD + * mitigation off behind our back, let's set the state + * to what we expect it to be. + */ + switch (arm64_get_ssbd_state()) { + case ARM64_SSBD_FORCE_ENABLE: + case ARM64_SSBD_KERNEL: + arm64_set_ssbd_mitigation(true); + } } local_daif_restore(flags); diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 74bb56f656ef..8c9644376326 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -30,7 +30,6 @@ #include <linux/smp.h> #include <linux/uaccess.h> -#include <asm/compat.h> #include <asm/current.h> #include <asm/debug-monitors.h> #include <asm/hw_breakpoint.h> @@ -344,14 +343,13 @@ static int get_hbp_len(u8 hbp_len) /* * Check whether bp virtual address is in kernel space. */ -int arch_check_bp_in_kernelspace(struct perf_event *bp) +int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw) { unsigned int len; unsigned long va; - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - va = info->address; - len = get_hbp_len(info->ctrl.len); + va = hw->address; + len = get_hbp_len(hw->ctrl.len); return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); } @@ -422,53 +420,53 @@ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, /* * Construct an arch_hw_breakpoint from a perf_event. */ -static int arch_build_bp_info(struct perf_event *bp) +static int arch_build_bp_info(struct perf_event *bp, + const struct perf_event_attr *attr, + struct arch_hw_breakpoint *hw) { - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - /* Type */ - switch (bp->attr.bp_type) { + switch (attr->bp_type) { case HW_BREAKPOINT_X: - info->ctrl.type = ARM_BREAKPOINT_EXECUTE; + hw->ctrl.type = ARM_BREAKPOINT_EXECUTE; break; case HW_BREAKPOINT_R: - info->ctrl.type = ARM_BREAKPOINT_LOAD; + hw->ctrl.type = ARM_BREAKPOINT_LOAD; break; case HW_BREAKPOINT_W: - info->ctrl.type = ARM_BREAKPOINT_STORE; + hw->ctrl.type = ARM_BREAKPOINT_STORE; break; case HW_BREAKPOINT_RW: - info->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE; + hw->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE; break; default: return -EINVAL; } /* Len */ - switch (bp->attr.bp_len) { + switch (attr->bp_len) { case HW_BREAKPOINT_LEN_1: - info->ctrl.len = ARM_BREAKPOINT_LEN_1; + hw->ctrl.len = ARM_BREAKPOINT_LEN_1; break; case HW_BREAKPOINT_LEN_2: - info->ctrl.len = ARM_BREAKPOINT_LEN_2; + hw->ctrl.len = ARM_BREAKPOINT_LEN_2; break; case HW_BREAKPOINT_LEN_3: - info->ctrl.len = ARM_BREAKPOINT_LEN_3; + hw->ctrl.len = ARM_BREAKPOINT_LEN_3; break; case HW_BREAKPOINT_LEN_4: - info->ctrl.len = ARM_BREAKPOINT_LEN_4; + hw->ctrl.len = ARM_BREAKPOINT_LEN_4; break; case HW_BREAKPOINT_LEN_5: - info->ctrl.len = ARM_BREAKPOINT_LEN_5; + hw->ctrl.len = ARM_BREAKPOINT_LEN_5; break; case HW_BREAKPOINT_LEN_6: - info->ctrl.len = ARM_BREAKPOINT_LEN_6; + hw->ctrl.len = ARM_BREAKPOINT_LEN_6; break; case HW_BREAKPOINT_LEN_7: - info->ctrl.len = ARM_BREAKPOINT_LEN_7; + hw->ctrl.len = ARM_BREAKPOINT_LEN_7; break; case HW_BREAKPOINT_LEN_8: - info->ctrl.len = ARM_BREAKPOINT_LEN_8; + hw->ctrl.len = ARM_BREAKPOINT_LEN_8; break; default: return -EINVAL; @@ -479,37 +477,37 @@ static int arch_build_bp_info(struct perf_event *bp) * AArch32 also requires breakpoints of length 2 for Thumb. * Watchpoints can be of length 1, 2, 4 or 8 bytes. */ - if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { + if (hw->ctrl.type == ARM_BREAKPOINT_EXECUTE) { if (is_compat_bp(bp)) { - if (info->ctrl.len != ARM_BREAKPOINT_LEN_2 && - info->ctrl.len != ARM_BREAKPOINT_LEN_4) + if (hw->ctrl.len != ARM_BREAKPOINT_LEN_2 && + hw->ctrl.len != ARM_BREAKPOINT_LEN_4) return -EINVAL; - } else if (info->ctrl.len != ARM_BREAKPOINT_LEN_4) { + } else if (hw->ctrl.len != ARM_BREAKPOINT_LEN_4) { /* * FIXME: Some tools (I'm looking at you perf) assume * that breakpoints should be sizeof(long). This * is nonsense. For now, we fix up the parameter * but we should probably return -EINVAL instead. */ - info->ctrl.len = ARM_BREAKPOINT_LEN_4; + hw->ctrl.len = ARM_BREAKPOINT_LEN_4; } } /* Address */ - info->address = bp->attr.bp_addr; + hw->address = attr->bp_addr; /* * Privilege * Note that we disallow combined EL0/EL1 breakpoints because * that would complicate the stepping code. */ - if (arch_check_bp_in_kernelspace(bp)) - info->ctrl.privilege = AARCH64_BREAKPOINT_EL1; + if (arch_check_bp_in_kernelspace(hw)) + hw->ctrl.privilege = AARCH64_BREAKPOINT_EL1; else - info->ctrl.privilege = AARCH64_BREAKPOINT_EL0; + hw->ctrl.privilege = AARCH64_BREAKPOINT_EL0; /* Enabled? */ - info->ctrl.enabled = !bp->attr.disabled; + hw->ctrl.enabled = !attr->disabled; return 0; } @@ -517,14 +515,15 @@ static int arch_build_bp_info(struct perf_event *bp) /* * Validate the arch-specific HW Breakpoint register settings. */ -int arch_validate_hwbkpt_settings(struct perf_event *bp) +int hw_breakpoint_arch_parse(struct perf_event *bp, + const struct perf_event_attr *attr, + struct arch_hw_breakpoint *hw) { - struct arch_hw_breakpoint *info = counter_arch_bp(bp); int ret; u64 alignment_mask, offset; /* Build the arch_hw_breakpoint. */ - ret = arch_build_bp_info(bp); + ret = arch_build_bp_info(bp, attr, hw); if (ret) return ret; @@ -538,42 +537,42 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * that here. */ if (is_compat_bp(bp)) { - if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) + if (hw->ctrl.len == ARM_BREAKPOINT_LEN_8) alignment_mask = 0x7; else alignment_mask = 0x3; - offset = info->address & alignment_mask; + offset = hw->address & alignment_mask; switch (offset) { case 0: /* Aligned */ break; case 1: /* Allow single byte watchpoint. */ - if (info->ctrl.len == ARM_BREAKPOINT_LEN_1) + if (hw->ctrl.len == ARM_BREAKPOINT_LEN_1) break; case 2: /* Allow halfword watchpoints and breakpoints. */ - if (info->ctrl.len == ARM_BREAKPOINT_LEN_2) + if (hw->ctrl.len == ARM_BREAKPOINT_LEN_2) break; default: return -EINVAL; } } else { - if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) + if (hw->ctrl.type == ARM_BREAKPOINT_EXECUTE) alignment_mask = 0x3; else alignment_mask = 0x7; - offset = info->address & alignment_mask; + offset = hw->address & alignment_mask; } - info->address &= ~alignment_mask; - info->ctrl.len <<= offset; + hw->address &= ~alignment_mask; + hw->ctrl.len <<= offset; /* * Disallow per-task kernel breakpoints since these would * complicate the stepping code. */ - if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target) + if (hw->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target) return -EINVAL; return 0; diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index c7fcb232fe47..a820ed07fb80 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -103,6 +103,7 @@ __efistub_strlen = KALLSYMS_HIDE(__pi_strlen); __efistub_strnlen = KALLSYMS_HIDE(__pi_strnlen); __efistub_strcmp = KALLSYMS_HIDE(__pi_strcmp); __efistub_strncmp = KALLSYMS_HIDE(__pi_strncmp); +__efistub_strrchr = KALLSYMS_HIDE(__pi_strrchr); __efistub___flush_dcache_area = KALLSYMS_HIDE(__pi___flush_dcache_area); #ifdef CONFIG_KASAN diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 2718a77da165..2b3413549734 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -35,6 +35,7 @@ #define AARCH64_INSN_SF_BIT BIT(31) #define AARCH64_INSN_N_BIT BIT(22) +#define AARCH64_INSN_LSL_12 BIT(22) static int aarch64_insn_encoding_class[] = { AARCH64_INSN_CLS_UNKNOWN, @@ -148,20 +149,6 @@ int __kprobes aarch64_insn_write(void *addr, u32 insn) return __aarch64_insn_write(addr, cpu_to_le32(insn)); } -static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) -{ - if (aarch64_get_insn_class(insn) != AARCH64_INSN_CLS_BR_SYS) - return false; - - return aarch64_insn_is_b(insn) || - aarch64_insn_is_bl(insn) || - aarch64_insn_is_svc(insn) || - aarch64_insn_is_hvc(insn) || - aarch64_insn_is_smc(insn) || - aarch64_insn_is_brk(insn) || - aarch64_insn_is_nop(insn); -} - bool __kprobes aarch64_insn_uses_literal(u32 insn) { /* ldr/ldrsw (literal), prfm */ @@ -188,22 +175,6 @@ bool __kprobes aarch64_insn_is_branch(u32 insn) aarch64_insn_is_bcond(insn); } -/* - * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a - * Section B2.6.5 "Concurrent modification and execution of instructions": - * Concurrent modification and execution of instructions can lead to the - * resulting instruction performing any behavior that can be achieved by - * executing any sequence of instructions that can be executed from the - * same Exception level, except where the instruction before modification - * and the instruction after modification is a B, BL, NOP, BKPT, SVC, HVC, - * or SMC instruction. - */ -bool __kprobes aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn) -{ - return __aarch64_insn_hotpatch_safe(old_insn) && - __aarch64_insn_hotpatch_safe(new_insn); -} - int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) { u32 *tp = addr; @@ -215,8 +186,8 @@ int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) ret = aarch64_insn_write(tp, insn); if (ret == 0) - flush_icache_range((uintptr_t)tp, - (uintptr_t)tp + AARCH64_INSN_SIZE); + __flush_icache_range((uintptr_t)tp, + (uintptr_t)tp + AARCH64_INSN_SIZE); return ret; } @@ -238,11 +209,6 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg) for (i = 0; ret == 0 && i < pp->insn_cnt; i++) ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i], pp->new_insns[i]); - /* - * aarch64_insn_patch_text_nosync() calls flush_icache_range(), - * which ends with "dsb; isb" pair guaranteeing global - * visibility. - */ /* Notify other processors with an additional increment. */ atomic_inc(&pp->cpu_count); } else { @@ -254,8 +220,7 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg) return ret; } -static -int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt) +int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt) { struct aarch64_insn_patch patch = { .text_addrs = addrs, @@ -271,34 +236,6 @@ int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt) cpu_online_mask); } -int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt) -{ - int ret; - u32 insn; - - /* Unsafe to patch multiple instructions without synchronizaiton */ - if (cnt == 1) { - ret = aarch64_insn_read(addrs[0], &insn); - if (ret) - return ret; - - if (aarch64_insn_hotpatch_safe(insn, insns[0])) { - /* - * ARMv8 architecture doesn't guarantee all CPUs see - * the new instruction after returning from function - * aarch64_insn_patch_text_nosync(). So send IPIs to - * all other CPUs to achieve instruction - * synchronization. - */ - ret = aarch64_insn_patch_text_nosync(addrs[0], insns[0]); - kick_all_cpus_sync(); - return ret; - } - } - - return aarch64_insn_patch_text_sync(addrs, insns, cnt); -} - static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type, u32 *maskp, int *shiftp) { @@ -343,6 +280,10 @@ static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type, mask = BIT(6) - 1; shift = 16; break; + case AARCH64_INSN_IMM_N: + mask = 1; + shift = 22; + break; default: return -EINVAL; } @@ -899,9 +840,18 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, return AARCH64_BREAK_FAULT; } + /* We can't encode more than a 24bit value (12bit + 12bit shift) */ + if (imm & ~(BIT(24) - 1)) + goto out; + + /* If we have something in the top 12 bits... */ if (imm & ~(SZ_4K - 1)) { - pr_err("%s: invalid immediate encoding %d\n", __func__, imm); - return AARCH64_BREAK_FAULT; + /* ... and in the low 12 bits -> error */ + if (imm & (SZ_4K - 1)) + goto out; + + imm >>= 12; + insn |= AARCH64_INSN_LSL_12; } insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst); @@ -909,6 +859,10 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src); return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm); + +out: + pr_err("%s: invalid immediate encoding %d\n", __func__, imm); + return AARCH64_BREAK_FAULT; } u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst, @@ -1481,3 +1435,171 @@ pstate_check_t * const aarch32_opcode_cond_checks[16] = { __check_hi, __check_ls, __check_ge, __check_lt, __check_gt, __check_le, __check_al, __check_al }; + +static bool range_of_ones(u64 val) +{ + /* Doesn't handle full ones or full zeroes */ + u64 sval = val >> __ffs64(val); + + /* One of Sean Eron Anderson's bithack tricks */ + return ((sval + 1) & (sval)) == 0; +} + +static u32 aarch64_encode_immediate(u64 imm, + enum aarch64_insn_variant variant, + u32 insn) +{ + unsigned int immr, imms, n, ones, ror, esz, tmp; + u64 mask = ~0UL; + + /* Can't encode full zeroes or full ones */ + if (!imm || !~imm) + return AARCH64_BREAK_FAULT; + + switch (variant) { + case AARCH64_INSN_VARIANT_32BIT: + if (upper_32_bits(imm)) + return AARCH64_BREAK_FAULT; + esz = 32; + break; + case AARCH64_INSN_VARIANT_64BIT: + insn |= AARCH64_INSN_SF_BIT; + esz = 64; + break; + default: + pr_err("%s: unknown variant encoding %d\n", __func__, variant); + return AARCH64_BREAK_FAULT; + } + + /* + * Inverse of Replicate(). Try to spot a repeating pattern + * with a pow2 stride. + */ + for (tmp = esz / 2; tmp >= 2; tmp /= 2) { + u64 emask = BIT(tmp) - 1; + + if ((imm & emask) != ((imm >> tmp) & emask)) + break; + + esz = tmp; + mask = emask; + } + + /* N is only set if we're encoding a 64bit value */ + n = esz == 64; + + /* Trim imm to the element size */ + imm &= mask; + + /* That's how many ones we need to encode */ + ones = hweight64(imm); + + /* + * imms is set to (ones - 1), prefixed with a string of ones + * and a zero if they fit. Cap it to 6 bits. + */ + imms = ones - 1; + imms |= 0xf << ffs(esz); + imms &= BIT(6) - 1; + + /* Compute the rotation */ + if (range_of_ones(imm)) { + /* + * Pattern: 0..01..10..0 + * + * Compute how many rotate we need to align it right + */ + ror = __ffs64(imm); + } else { + /* + * Pattern: 0..01..10..01..1 + * + * Fill the unused top bits with ones, and check if + * the result is a valid immediate (all ones with a + * contiguous ranges of zeroes). + */ + imm |= ~mask; + if (!range_of_ones(~imm)) + return AARCH64_BREAK_FAULT; + + /* + * Compute the rotation to get a continuous set of + * ones, with the first bit set at position 0 + */ + ror = fls(~imm); + } + + /* + * immr is the number of bits we need to rotate back to the + * original set of ones. Note that this is relative to the + * element size... + */ + immr = (esz - ror) % esz; + + insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_N, insn, n); + insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_R, insn, immr); + return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, imms); +} + +u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type, + enum aarch64_insn_variant variant, + enum aarch64_insn_register Rn, + enum aarch64_insn_register Rd, + u64 imm) +{ + u32 insn; + + switch (type) { + case AARCH64_INSN_LOGIC_AND: + insn = aarch64_insn_get_and_imm_value(); + break; + case AARCH64_INSN_LOGIC_ORR: + insn = aarch64_insn_get_orr_imm_value(); + break; + case AARCH64_INSN_LOGIC_EOR: + insn = aarch64_insn_get_eor_imm_value(); + break; + case AARCH64_INSN_LOGIC_AND_SETFLAGS: + insn = aarch64_insn_get_ands_imm_value(); + break; + default: + pr_err("%s: unknown logical encoding %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, Rd); + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn); + return aarch64_encode_immediate(imm, variant, insn); +} + +u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant, + enum aarch64_insn_register Rm, + enum aarch64_insn_register Rn, + enum aarch64_insn_register Rd, + u8 lsb) +{ + u32 insn; + + insn = aarch64_insn_get_extr_value(); + + switch (variant) { + case AARCH64_INSN_VARIANT_32BIT: + if (lsb > 31) + return AARCH64_BREAK_FAULT; + break; + case AARCH64_INSN_VARIANT_64BIT: + if (lsb > 63) + return AARCH64_BREAK_FAULT; + insn |= AARCH64_INSN_SF_BIT; + insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_N, insn, 1); + break; + default: + pr_err("%s: unknown variant encoding %d\n", __func__, variant); + return AARCH64_BREAK_FAULT; + } + + insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, lsb); + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, Rd); + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn); + return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm); +} diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 60e5fc661f74..780a12f59a8f 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -42,16 +42,6 @@ int arch_show_interrupts(struct seq_file *p, int prec) return 0; } -void (*handle_arch_irq)(struct pt_regs *) = NULL; - -void __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) -{ - if (handle_arch_irq) - return; - - handle_arch_irq = handle_irq; -} - #ifdef CONFIG_VMAP_STACK static void init_irq_stacks(void) { diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c index c2dd1ad3e648..646b9562ee64 100644 --- a/arch/arm64/kernel/jump_label.c +++ b/arch/arm64/kernel/jump_label.c @@ -25,18 +25,18 @@ void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - void *addr = (void *)entry->code; + void *addr = (void *)jump_entry_code(entry); u32 insn; if (type == JUMP_LABEL_JMP) { - insn = aarch64_insn_gen_branch_imm(entry->code, - entry->target, + insn = aarch64_insn_gen_branch_imm(jump_entry_code(entry), + jump_entry_target(entry), AARCH64_INSN_BRANCH_NOLINK); } else { insn = aarch64_insn_gen_nop(); } - aarch64_insn_patch_text(&addr, &insn, 1); + aarch64_insn_patch_text_nosync(addr, insn); } void arch_jump_label_transform_static(struct jump_entry *entry, diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 47080c49cc7e..f0e6ab8abe9c 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -117,53 +117,42 @@ u64 __init kaslr_early_init(u64 dt_phys) /* * OK, so we are proceeding with KASLR enabled. Calculate a suitable * kernel image offset from the seed. Let's place the kernel in the - * lower half of the VMALLOC area (VA_BITS - 2). + * middle half of the VMALLOC area (VA_BITS - 2), and stay clear of + * the lower and upper quarters to avoid colliding with other + * allocations. * Even if we could randomize at page granularity for 16k and 64k pages, * let's always round to 2 MB so we don't interfere with the ability to * map using contiguous PTEs */ mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1); - offset = seed & mask; + offset = BIT(VA_BITS - 3) + (seed & mask); /* use the top 16 bits to randomize the linear region */ memstart_offset_seed = seed >> 48; - /* - * The kernel Image should not extend across a 1GB/32MB/512MB alignment - * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this - * happens, round down the KASLR offset by (1 << SWAPPER_TABLE_SHIFT). - * - * NOTE: The references to _text and _end below will already take the - * modulo offset (the physical displacement modulo 2 MB) into - * account, given that the physical placement is controlled by - * the loader, and will not change as a result of the virtual - * mapping we choose. - */ - if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) != - (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT)) - offset = round_down(offset, 1 << SWAPPER_TABLE_SHIFT); - if (IS_ENABLED(CONFIG_KASAN)) /* * KASAN does not expect the module region to intersect the * vmalloc region, since shadow memory is allocated for each * module at load time, whereas the vmalloc region is shadowed * by KASAN zero pages. So keep modules out of the vmalloc - * region if KASAN is enabled. + * region if KASAN is enabled, and put the kernel well within + * 4 GB of the module region. */ - return offset; + return offset % SZ_2G; if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { /* - * Randomize the module region independently from the core - * kernel. This prevents modules from leaking any information + * Randomize the module region over a 4 GB window covering the + * kernel. This reduces the risk of modules leaking information * about the address of the kernel itself, but results in * branches between modules and the core kernel that are * resolved via PLTs. (Branches between modules will be * resolved normally.) */ - module_range = VMALLOC_END - VMALLOC_START - MODULES_VSIZE; - module_alloc_base = VMALLOC_START; + module_range = SZ_4G - (u64)(_end - _stext); + module_alloc_base = max((u64)_end + offset - SZ_4G, + (u64)MODULES_VADDR); } else { /* * Randomize the module region by setting module_alloc_base to diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index 2122cd187f19..a20de58061a8 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -138,14 +138,25 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) { - struct pt_regs *thread_regs; + struct cpu_context *cpu_context = &task->thread.cpu_context; /* Initialize to zero */ memset((char *)gdb_regs, 0, NUMREGBYTES); - thread_regs = task_pt_regs(task); - memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES); - /* Special case for PSTATE (check comments in asm/kgdb.h for details) */ - dbg_get_reg(33, gdb_regs + GP_REG_BYTES, thread_regs); + + gdb_regs[19] = cpu_context->x19; + gdb_regs[20] = cpu_context->x20; + gdb_regs[21] = cpu_context->x21; + gdb_regs[22] = cpu_context->x22; + gdb_regs[23] = cpu_context->x23; + gdb_regs[24] = cpu_context->x24; + gdb_regs[25] = cpu_context->x25; + gdb_regs[26] = cpu_context->x26; + gdb_regs[27] = cpu_context->x27; + gdb_regs[28] = cpu_context->x28; + gdb_regs[29] = cpu_context->fp; + + gdb_regs[31] = cpu_context->sp; + gdb_regs[32] = cpu_context->pc; } void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index f76ea92dff91..922add8adb74 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -184,8 +184,15 @@ void machine_kexec(struct kimage *kimage) /* Flush the reboot_code_buffer in preparation for its execution. */ __flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size); - flush_icache_range((uintptr_t)reboot_code_buffer, - arm64_relocate_new_kernel_size); + + /* + * Although we've killed off the secondary CPUs, we don't update + * the online mask if we're handling a crash kernel and consequently + * need to avoid flush_icache_range(), which will attempt to IPI + * the offline CPUs. Therefore, we must use the __* variant here. + */ + __flush_icache_range((uintptr_t)reboot_code_buffer, + arm64_relocate_new_kernel_size); /* Flush the kimage list and its buffers. */ kexec_list_flush(kimage); @@ -207,8 +214,7 @@ void machine_kexec(struct kimage *kimage) * relocation is complete. */ - cpu_soft_restart(kimage != kexec_crash_image, - reboot_code_buffer_phys, kimage->head, kimage->start, 0); + cpu_soft_restart(reboot_code_buffer_phys, kimage->head, kimage->start, 0); BUG(); /* Should never get here. */ } @@ -352,13 +358,3 @@ void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) } } #endif /* CONFIG_HIBERNATION */ - -void arch_crash_save_vmcoreinfo(void) -{ - VMCOREINFO_NUMBER(VA_BITS); - /* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */ - vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n", - kimage_voffset); - vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n", - PHYS_OFFSET); -} diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index ea640f92fe5a..f0690c2ca3e0 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -36,11 +36,53 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, return (u64)&plt[i - 1]; pltsec->plt_num_entries++; - BUG_ON(pltsec->plt_num_entries > pltsec->plt_max_entries); + if (WARN_ON(pltsec->plt_num_entries > pltsec->plt_max_entries)) + return 0; return (u64)&plt[i]; } +#ifdef CONFIG_ARM64_ERRATUM_843419 +u64 module_emit_veneer_for_adrp(struct module *mod, void *loc, u64 val) +{ + struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core : + &mod->arch.init; + struct plt_entry *plt = (struct plt_entry *)pltsec->plt->sh_addr; + int i = pltsec->plt_num_entries++; + u32 mov0, mov1, mov2, br; + int rd; + + if (WARN_ON(pltsec->plt_num_entries > pltsec->plt_max_entries)) + return 0; + + /* get the destination register of the ADRP instruction */ + rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, + le32_to_cpup((__le32 *)loc)); + + /* generate the veneer instructions */ + mov0 = aarch64_insn_gen_movewide(rd, (u16)~val, 0, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_INVERSE); + mov1 = aarch64_insn_gen_movewide(rd, (u16)(val >> 16), 16, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_KEEP); + mov2 = aarch64_insn_gen_movewide(rd, (u16)(val >> 32), 32, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_KEEP); + br = aarch64_insn_gen_branch_imm((u64)&plt[i].br, (u64)loc + 4, + AARCH64_INSN_BRANCH_NOLINK); + + plt[i] = (struct plt_entry){ + cpu_to_le32(mov0), + cpu_to_le32(mov1), + cpu_to_le32(mov2), + cpu_to_le32(br) + }; + + return (u64)&plt[i]; +} +#endif + #define cmp_3way(a,b) ((a) < (b) ? -1 : (a) > (b)) static int cmp_rela(const void *a, const void *b) @@ -68,16 +110,21 @@ static bool duplicate_rel(const Elf64_Rela *rela, int num) } static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num, - Elf64_Word dstidx) + Elf64_Word dstidx, Elf_Shdr *dstsec) { unsigned int ret = 0; Elf64_Sym *s; int i; for (i = 0; i < num; i++) { + u64 min_align; + switch (ELF64_R_TYPE(rela[i].r_info)) { case R_AARCH64_JUMP26: case R_AARCH64_CALL26: + if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE)) + break; + /* * We only have to consider branch targets that resolve * to symbols that are defined in a different section. @@ -109,6 +156,41 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num, if (rela[i].r_addend != 0 || !duplicate_rel(rela, i)) ret++; break; + case R_AARCH64_ADR_PREL_PG_HI21_NC: + case R_AARCH64_ADR_PREL_PG_HI21: + if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) || + !cpus_have_const_cap(ARM64_WORKAROUND_843419)) + break; + + /* + * Determine the minimal safe alignment for this ADRP + * instruction: the section alignment at which it is + * guaranteed not to appear at a vulnerable offset. + * + * This comes down to finding the least significant zero + * bit in bits [11:3] of the section offset, and + * increasing the section's alignment so that the + * resulting address of this instruction is guaranteed + * to equal the offset in that particular bit (as well + * as all less signficant bits). This ensures that the + * address modulo 4 KB != 0xfff8 or 0xfffc (which would + * have all ones in bits [11:3]) + */ + min_align = 2ULL << ffz(rela[i].r_offset | 0x7); + + /* + * Allocate veneer space for each ADRP that may appear + * at a vulnerable offset nonetheless. At relocation + * time, some of these will remain unused since some + * ADRP instructions can be patched to ADR instructions + * instead. + */ + if (min_align > SZ_4K) + ret++; + else + dstsec->sh_addralign = max(dstsec->sh_addralign, + min_align); + break; } } return ret; @@ -166,10 +248,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, if (strncmp(secstrings + dstsec->sh_name, ".init", 5) != 0) core_plts += count_plts(syms, rels, numrels, - sechdrs[i].sh_info); + sechdrs[i].sh_info, dstsec); else init_plts += count_plts(syms, rels, numrels, - sechdrs[i].sh_info); + sechdrs[i].sh_info, dstsec); } mod->arch.core.plt->sh_type = SHT_NOBITS; diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index f469e0435903..f0f27aeefb73 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -55,9 +55,10 @@ void *module_alloc(unsigned long size) * less likely that the module region gets exhausted, so we * can simply omit this fallback in that case. */ - p = __vmalloc_node_range(size, MODULE_ALIGN, VMALLOC_START, - VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, - NUMA_NO_NODE, __builtin_return_address(0)); + p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, + module_alloc_base + SZ_4G, GFP_KERNEL, + PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + __builtin_return_address(0)); if (p && (kasan_module_alloc(p, size) < 0)) { vfree(p); @@ -197,6 +198,34 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val, return 0; } +static int reloc_insn_adrp(struct module *mod, __le32 *place, u64 val) +{ + u32 insn; + + if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) || + !cpus_have_const_cap(ARM64_WORKAROUND_843419) || + ((u64)place & 0xfff) < 0xff8) + return reloc_insn_imm(RELOC_OP_PAGE, place, val, 12, 21, + AARCH64_INSN_IMM_ADR); + + /* patch ADRP to ADR if it is in range */ + if (!reloc_insn_imm(RELOC_OP_PREL, place, val & ~0xfff, 0, 21, + AARCH64_INSN_IMM_ADR)) { + insn = le32_to_cpu(*place); + insn &= ~BIT(31); + } else { + /* out of range for ADR -> emit a veneer */ + val = module_emit_veneer_for_adrp(mod, place, val & ~0xfff); + if (!val) + return -ENOEXEC; + insn = aarch64_insn_gen_branch_imm((u64)place, val, + AARCH64_INSN_BRANCH_NOLINK); + } + + *place = cpu_to_le32(insn); + return 0; +} + int apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex, @@ -336,14 +365,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, AARCH64_INSN_IMM_ADR); break; -#ifndef CONFIG_ARM64_ERRATUM_843419 case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; case R_AARCH64_ADR_PREL_PG_HI21: - ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21, - AARCH64_INSN_IMM_ADR); + ovf = reloc_insn_adrp(me, loc, val); + if (ovf && ovf != -ERANGE) + return ovf; break; -#endif case R_AARCH64_ADD_ABS_LO12_NC: case R_AARCH64_LDST8_ABS_LO12_NC: overflow_check = false; @@ -386,6 +414,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && ovf == -ERANGE) { val = module_emit_plt_entry(me, loc, &rel[i], sym); + if (!val) + return -ENOEXEC; ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, AARCH64_INSN_IMM_26); } @@ -418,9 +448,8 @@ int module_finalize(const Elf_Ehdr *hdr, const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { - if (strcmp(".altinstructions", secstrs + s->sh_name) == 0) { - apply_alternatives((void *)s->sh_addr, s->sh_size); - } + if (strcmp(".altinstructions", secstrs + s->sh_name) == 0) + apply_alternatives_module((void *)s->sh_addr, s->sh_size); #ifdef CONFIG_ARM64_MODULE_PLTS if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && !strcmp(".text.ftrace_trampoline", secstrs + s->sh_name)) diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 53f371ed4568..75c158b0353f 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -21,5 +21,5 @@ struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; -struct pv_time_ops pv_time_ops; -EXPORT_SYMBOL_GPL(pv_time_ops); +struct paravirt_patch_template pv_ops; +EXPORT_SYMBOL_GPL(pv_ops); diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index 0e2ea1c78542..bb85e2f4603f 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -165,16 +165,15 @@ static void pci_acpi_generic_release_info(struct acpi_pci_root_info *ci) /* Interface called from ACPI code to setup PCI host controller */ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) { - int node = acpi_get_node(root->device->handle); struct acpi_pci_generic_root_info *ri; struct pci_bus *bus, *child; struct acpi_pci_root_ops *root_ops; - ri = kzalloc_node(sizeof(*ri), GFP_KERNEL, node); + ri = kzalloc(sizeof(*ri), GFP_KERNEL); if (!ri) return NULL; - root_ops = kzalloc_node(sizeof(*root_ops), GFP_KERNEL, node); + root_ops = kzalloc(sizeof(*root_ops), GFP_KERNEL); if (!root_ops) { kfree(ri); return NULL; diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 85a251b6dfa8..e213f8e867f6 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -25,6 +25,7 @@ #include <asm/virt.h> #include <linux/acpi.h> +#include <linux/clocksource.h> #include <linux/of.h> #include <linux/perf/arm_pmu.h> #include <linux/platform_device.h> @@ -446,9 +447,16 @@ static struct attribute_group armv8_pmuv3_events_attr_group = { }; PMU_FORMAT_ATTR(event, "config:0-15"); +PMU_FORMAT_ATTR(long, "config1:0"); + +static inline bool armv8pmu_event_is_64bit(struct perf_event *event) +{ + return event->attr.config1 & 0x1; +} static struct attribute *armv8_pmuv3_format_attrs[] = { &format_attr_event.attr, + &format_attr_long.attr, NULL, }; @@ -466,6 +474,21 @@ static struct attribute_group armv8_pmuv3_format_attr_group = { (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) /* + * We must chain two programmable counters for 64 bit events, + * except when we have allocated the 64bit cycle counter (for CPU + * cycles event). This must be called only when the event has + * a counter allocated. + */ +static inline bool armv8pmu_event_is_chained(struct perf_event *event) +{ + int idx = event->hw.idx; + + return !WARN_ON(idx < 0) && + armv8pmu_event_is_64bit(event) && + (idx != ARMV8_IDX_CYCLE_COUNTER); +} + +/* * ARMv8 low level PMU access */ @@ -503,34 +526,68 @@ static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); } -static inline int armv8pmu_select_counter(int idx) +static inline void armv8pmu_select_counter(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); write_sysreg(counter, pmselr_el0); isb(); +} - return idx; +static inline u32 armv8pmu_read_evcntr(int idx) +{ + armv8pmu_select_counter(idx); + return read_sysreg(pmxevcntr_el0); +} + +static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) +{ + int idx = event->hw.idx; + u64 val = 0; + + val = armv8pmu_read_evcntr(idx); + if (armv8pmu_event_is_chained(event)) + val = (val << 32) | armv8pmu_read_evcntr(idx - 1); + return val; } -static inline u32 armv8pmu_read_counter(struct perf_event *event) +static inline u64 armv8pmu_read_counter(struct perf_event *event) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - u32 value = 0; + u64 value = 0; if (!armv8pmu_counter_valid(cpu_pmu, idx)) pr_err("CPU%u reading wrong counter %d\n", smp_processor_id(), idx); else if (idx == ARMV8_IDX_CYCLE_COUNTER) value = read_sysreg(pmccntr_el0); - else if (armv8pmu_select_counter(idx) == idx) - value = read_sysreg(pmxevcntr_el0); + else + value = armv8pmu_read_hw_counter(event); return value; } -static inline void armv8pmu_write_counter(struct perf_event *event, u32 value) +static inline void armv8pmu_write_evcntr(int idx, u32 value) +{ + armv8pmu_select_counter(idx); + write_sysreg(value, pmxevcntr_el0); +} + +static inline void armv8pmu_write_hw_counter(struct perf_event *event, + u64 value) +{ + int idx = event->hw.idx; + + if (armv8pmu_event_is_chained(event)) { + armv8pmu_write_evcntr(idx, upper_32_bits(value)); + armv8pmu_write_evcntr(idx - 1, lower_32_bits(value)); + } else { + armv8pmu_write_evcntr(idx, value); + } +} + +static inline void armv8pmu_write_counter(struct perf_event *event, u64 value) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; @@ -541,22 +598,43 @@ static inline void armv8pmu_write_counter(struct perf_event *event, u32 value) smp_processor_id(), idx); else if (idx == ARMV8_IDX_CYCLE_COUNTER) { /* - * Set the upper 32bits as this is a 64bit counter but we only - * count using the lower 32bits and we want an interrupt when - * it overflows. + * The cycles counter is really a 64-bit counter. + * When treating it as a 32-bit counter, we only count + * the lower 32 bits, and set the upper 32-bits so that + * we get an interrupt upon 32-bit overflow. */ - u64 value64 = 0xffffffff00000000ULL | value; - - write_sysreg(value64, pmccntr_el0); - } else if (armv8pmu_select_counter(idx) == idx) - write_sysreg(value, pmxevcntr_el0); + if (!armv8pmu_event_is_64bit(event)) + value |= 0xffffffff00000000ULL; + write_sysreg(value, pmccntr_el0); + } else + armv8pmu_write_hw_counter(event, value); } static inline void armv8pmu_write_evtype(int idx, u32 val) { - if (armv8pmu_select_counter(idx) == idx) { - val &= ARMV8_PMU_EVTYPE_MASK; - write_sysreg(val, pmxevtyper_el0); + armv8pmu_select_counter(idx); + val &= ARMV8_PMU_EVTYPE_MASK; + write_sysreg(val, pmxevtyper_el0); +} + +static inline void armv8pmu_write_event_type(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + /* + * For chained events, the low counter is programmed to count + * the event of interest and the high counter is programmed + * with CHAIN event code with filters set to count at all ELs. + */ + if (armv8pmu_event_is_chained(event)) { + u32 chain_evt = ARMV8_PMUV3_PERFCTR_CHAIN | + ARMV8_PMU_INCLUDE_EL2; + + armv8pmu_write_evtype(idx - 1, hwc->config_base); + armv8pmu_write_evtype(idx, chain_evt); + } else { + armv8pmu_write_evtype(idx, hwc->config_base); } } @@ -567,6 +645,16 @@ static inline int armv8pmu_enable_counter(int idx) return idx; } +static inline void armv8pmu_enable_event_counter(struct perf_event *event) +{ + int idx = event->hw.idx; + + armv8pmu_enable_counter(idx); + if (armv8pmu_event_is_chained(event)) + armv8pmu_enable_counter(idx - 1); + isb(); +} + static inline int armv8pmu_disable_counter(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); @@ -574,6 +662,16 @@ static inline int armv8pmu_disable_counter(int idx) return idx; } +static inline void armv8pmu_disable_event_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (armv8pmu_event_is_chained(event)) + armv8pmu_disable_counter(idx - 1); + armv8pmu_disable_counter(idx); +} + static inline int armv8pmu_enable_intens(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); @@ -581,6 +679,11 @@ static inline int armv8pmu_enable_intens(int idx) return idx; } +static inline int armv8pmu_enable_event_irq(struct perf_event *event) +{ + return armv8pmu_enable_intens(event->hw.idx); +} + static inline int armv8pmu_disable_intens(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); @@ -593,6 +696,11 @@ static inline int armv8pmu_disable_intens(int idx) return idx; } +static inline int armv8pmu_disable_event_irq(struct perf_event *event) +{ + return armv8pmu_disable_intens(event->hw.idx); +} + static inline u32 armv8pmu_getreset_flags(void) { u32 value; @@ -610,10 +718,8 @@ static inline u32 armv8pmu_getreset_flags(void) static void armv8pmu_enable_event(struct perf_event *event) { unsigned long flags; - struct hw_perf_event *hwc = &event->hw; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - int idx = hwc->idx; /* * Enable counter and interrupt, and set the counter to count @@ -624,22 +730,22 @@ static void armv8pmu_enable_event(struct perf_event *event) /* * Disable counter */ - armv8pmu_disable_counter(idx); + armv8pmu_disable_event_counter(event); /* * Set event (if destined for PMNx counters). */ - armv8pmu_write_evtype(idx, hwc->config_base); + armv8pmu_write_event_type(event); /* * Enable interrupt for this counter */ - armv8pmu_enable_intens(idx); + armv8pmu_enable_event_irq(event); /* * Enable counter */ - armv8pmu_enable_counter(idx); + armv8pmu_enable_event_counter(event); raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } @@ -647,10 +753,8 @@ static void armv8pmu_enable_event(struct perf_event *event) static void armv8pmu_disable_event(struct perf_event *event) { unsigned long flags; - struct hw_perf_event *hwc = &event->hw; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); - int idx = hwc->idx; /* * Disable counter and interrupt @@ -660,21 +764,42 @@ static void armv8pmu_disable_event(struct perf_event *event) /* * Disable counter */ - armv8pmu_disable_counter(idx); + armv8pmu_disable_event_counter(event); /* * Disable interrupt for this counter */ - armv8pmu_disable_intens(idx); + armv8pmu_disable_event_irq(event); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void armv8pmu_start(struct arm_pmu *cpu_pmu) +{ + unsigned long flags; + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + raw_spin_lock_irqsave(&events->pmu_lock, flags); + /* Enable all counters */ + armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void armv8pmu_stop(struct arm_pmu *cpu_pmu) +{ + unsigned long flags; + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + /* Disable all counters */ + armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) +static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) { u32 pmovsr; struct perf_sample_data data; - struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); struct pt_regs *regs; int idx; @@ -695,6 +820,11 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) */ regs = get_irq_regs(); + /* + * Stop the PMU while processing the counter overflows + * to prevent skews in group events. + */ + armv8pmu_stop(cpu_pmu); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; @@ -719,6 +849,7 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) if (perf_event_overflow(event, &data, regs)) cpu_pmu->disable(event); } + armv8pmu_start(cpu_pmu); /* * Handle the pending perf events. @@ -732,32 +863,42 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) return IRQ_HANDLED; } -static void armv8pmu_start(struct arm_pmu *cpu_pmu) +static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc, + struct arm_pmu *cpu_pmu) { - unsigned long flags; - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + int idx; - raw_spin_lock_irqsave(&events->pmu_lock, flags); - /* Enable all counters */ - armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx ++) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + return -EAGAIN; } -static void armv8pmu_stop(struct arm_pmu *cpu_pmu) +static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, + struct arm_pmu *cpu_pmu) { - unsigned long flags; - struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + int idx; - raw_spin_lock_irqsave(&events->pmu_lock, flags); - /* Disable all counters */ - armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + /* + * Chaining requires two consecutive event counters, where + * the lower idx must be even. + */ + for (idx = ARMV8_IDX_COUNTER0 + 1; idx < cpu_pmu->num_events; idx += 2) { + if (!test_and_set_bit(idx, cpuc->used_mask)) { + /* Check if the preceding even counter is available */ + if (!test_and_set_bit(idx - 1, cpuc->used_mask)) + return idx; + /* Release the Odd counter */ + clear_bit(idx, cpuc->used_mask); + } + } + return -EAGAIN; } static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, struct perf_event *event) { - int idx; struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; @@ -771,13 +912,20 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, /* * Otherwise use events counters */ - for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) { - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; - } + if (armv8pmu_event_is_64bit(event)) + return armv8pmu_get_chain_idx(cpuc, cpu_pmu); + else + return armv8pmu_get_single_idx(cpuc, cpu_pmu); +} - /* The counters are all in use. */ - return -EAGAIN; +static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int idx = event->hw.idx; + + clear_bit(idx, cpuc->used_mask); + if (armv8pmu_event_is_chained(event)) + clear_bit(idx - 1, cpuc->used_mask); } /* @@ -818,6 +966,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, return 0; } +static int armv8pmu_filter_match(struct perf_event *event) +{ + unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT; + return evtype != ARMV8_PMUV3_PERFCTR_CHAIN; +} + static void armv8pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; @@ -852,6 +1006,9 @@ static int __armv8_pmuv3_map_event(struct perf_event *event, &armv8_pmuv3_perf_cache_map, ARMV8_PMU_EVTYPE_EVENT); + if (armv8pmu_event_is_64bit(event)) + event->hw.flags |= ARMPMU_EVT_64BIT; + /* Onl expose micro/arch events supported by this PMU */ if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) && test_bit(hw_event_id, armpmu->pmceid_bitmap)) { @@ -958,11 +1115,12 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->read_counter = armv8pmu_read_counter, cpu_pmu->write_counter = armv8pmu_write_counter, cpu_pmu->get_event_idx = armv8pmu_get_event_idx, + cpu_pmu->clear_event_idx = armv8pmu_clear_event_idx, cpu_pmu->start = armv8pmu_start, cpu_pmu->stop = armv8pmu_stop, cpu_pmu->reset = armv8pmu_reset, - cpu_pmu->max_period = (1LLU << 32) - 1, cpu_pmu->set_event_filter = armv8pmu_set_event_filter; + cpu_pmu->filter_match = armv8pmu_filter_match; return 0; } @@ -1128,3 +1286,32 @@ static int __init armv8_pmu_driver_init(void) return arm_pmu_acpi_probe(armv8_pmuv3_init); } device_initcall(armv8_pmu_driver_init) + +void arch_perf_update_userpage(struct perf_event *event, + struct perf_event_mmap_page *userpg, u64 now) +{ + u32 freq; + u32 shift; + + /* + * Internal timekeeping for enabled/running/stopped times + * is always computed with the sched_clock. + */ + freq = arch_timer_get_rate(); + userpg->cap_user_time = 1; + + clocks_calc_mult_shift(&userpg->time_mult, &shift, freq, + NSEC_PER_SEC, 0); + /* + * time_shift is not expected to be greater than 31 due to + * the original published conversion algorithm shifting a + * 32-bit value (now specifies a 64-bit value) - refer + * perf_event_mmap_page documentation in perf_event.h. + */ + if (shift == 32) { + shift = 31; + userpg->time_mult >>= 1; + } + userpg->time_shift = (u16)shift; + userpg->time_offset = -now; +} diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 1d091d048d04..0bbac612146e 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/compat.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/perf_event.h> #include <linux/bug.h> #include <linux/sched/task_stack.h> -#include <asm/compat.h> #include <asm/perf_regs.h> #include <asm/ptrace.h> diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index d849d9804011..2a5b338b2542 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -23,7 +23,9 @@ #include <linux/slab.h> #include <linux/stop_machine.h> #include <linux/sched/debug.h> +#include <linux/set_memory.h> #include <linux/stringify.h> +#include <linux/vmalloc.h> #include <asm/traps.h> #include <asm/ptrace.h> #include <asm/cacheflush.h> @@ -42,10 +44,21 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); static void __kprobes post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *); +static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode) +{ + void *addrs[1]; + u32 insns[1]; + + addrs[0] = addr; + insns[0] = opcode; + + return aarch64_insn_patch_text(addrs, insns, 1); +} + static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { /* prepare insn slot */ - p->ainsn.api.insn[0] = cpu_to_le32(p->opcode); + patch_text(p->ainsn.api.insn, p->opcode); flush_icache_range((uintptr_t) (p->ainsn.api.insn), (uintptr_t) (p->ainsn.api.insn) + @@ -107,7 +120,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) if (!p->ainsn.api.insn) return -ENOMEM; break; - }; + } /* prepare the instruction */ if (p->ainsn.api.insn) @@ -118,15 +131,15 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return 0; } -static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode) +void *alloc_insn_page(void) { - void *addrs[1]; - u32 insns[1]; + void *page; - addrs[0] = (void *)addr; - insns[0] = (u32)opcode; + page = vmalloc_exec(PAGE_SIZE); + if (page) + set_memory_ro((unsigned long)page, 1); - return aarch64_insn_patch_text(addrs, insns, 1); + return page; } /* arm kprobe: install breakpoint in text */ @@ -275,7 +288,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, break; case KPROBE_HIT_SS: case KPROBE_REENTER: - pr_warn("Unrecoverable kprobe detected at %p.\n", p->addr); + pr_warn("Unrecoverable kprobe detected.\n"); dump_kprobe(p); BUG(); break; @@ -395,9 +408,9 @@ static void __kprobes kprobe_handler(struct pt_regs *regs) /* * If we have no pre-handler or it returned 0, we * continue with normal processing. If we have a - * pre-handler and it returned non-zero, it prepped - * for calling the break_handler below on re-entry, - * so get out doing nothing more here. + * pre-handler and it returned non-zero, it will + * modify the execution path and no need to single + * stepping. Let's just reset current kprobe and exit. * * pre_handler can hit a breakpoint and can step thru * before return, keep PSTATE D-flag enabled until @@ -405,16 +418,8 @@ static void __kprobes kprobe_handler(struct pt_regs *regs) */ if (!p->pre_handler || !p->pre_handler(p, regs)) { setup_singlestep(p, regs, kcb, 0); - return; - } - } - } else if ((le32_to_cpu(*(kprobe_opcode_t *) addr) == - BRK64_OPCODE_KPROBES) && cur_kprobe) { - /* We probably hit a jprobe. Call its break handler. */ - if (cur_kprobe->break_handler && - cur_kprobe->break_handler(cur_kprobe, regs)) { - setup_singlestep(cur_kprobe, regs, kcb, 0); - return; + } else + reset_current_kprobe(); } } /* @@ -465,74 +470,6 @@ kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr) return DBG_HOOK_HANDLED; } -int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) -{ - struct jprobe *jp = container_of(p, struct jprobe, kp); - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - kcb->jprobe_saved_regs = *regs; - /* - * Since we can't be sure where in the stack frame "stacked" - * pass-by-value arguments are stored we just don't try to - * duplicate any of the stack. Do not use jprobes on functions that - * use more than 64 bytes (after padding each to an 8 byte boundary) - * of arguments, or pass individual arguments larger than 16 bytes. - */ - - instruction_pointer_set(regs, (unsigned long) jp->entry); - preempt_disable(); - pause_graph_tracing(); - return 1; -} - -void __kprobes jprobe_return(void) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - /* - * Jprobe handler return by entering break exception, - * encoded same as kprobe, but with following conditions - * -a special PC to identify it from the other kprobes. - * -restore stack addr to original saved pt_regs - */ - asm volatile(" mov sp, %0 \n" - "jprobe_return_break: brk %1 \n" - : - : "r" (kcb->jprobe_saved_regs.sp), - "I" (BRK64_ESR_KPROBES) - : "memory"); - - unreachable(); -} - -int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - long stack_addr = kcb->jprobe_saved_regs.sp; - long orig_sp = kernel_stack_pointer(regs); - struct jprobe *jp = container_of(p, struct jprobe, kp); - extern const char jprobe_return_break[]; - - if (instruction_pointer(regs) != (u64) jprobe_return_break) - return 0; - - if (orig_sp != stack_addr) { - struct pt_regs *saved_regs = - (struct pt_regs *)kcb->jprobe_saved_regs.sp; - pr_err("current sp %lx does not match saved sp %lx\n", - orig_sp, stack_addr); - pr_err("Saved registers for jprobe %p\n", jp); - __show_regs(saved_regs); - pr_err("Current registers\n"); - __show_regs(regs); - BUG(); - } - unpause_graph_tracing(); - *regs = kcb->jprobe_saved_regs; - preempt_enable_no_resched(); - return 1; -} - bool arch_within_kprobe_blacklist(unsigned long addr) { if ((addr >= (unsigned long)__kprobes_text_start && diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index c0da6efe5465..d9a4c2d6dd8b 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -59,7 +59,7 @@ #include <asm/processor.h> #include <asm/stacktrace.h> -#ifdef CONFIG_CC_STACKPROTECTOR +#ifdef CONFIG_STACKPROTECTOR #include <linux/stackprotector.h> unsigned long __stack_chk_guard __read_mostly; EXPORT_SYMBOL(__stack_chk_guard); @@ -177,16 +177,16 @@ static void print_pstate(struct pt_regs *regs) if (compat_user_mode(regs)) { printk("pstate: %08llx (%c%c%c%c %c %s %s %c%c%c)\n", pstate, - pstate & COMPAT_PSR_N_BIT ? 'N' : 'n', - pstate & COMPAT_PSR_Z_BIT ? 'Z' : 'z', - pstate & COMPAT_PSR_C_BIT ? 'C' : 'c', - pstate & COMPAT_PSR_V_BIT ? 'V' : 'v', - pstate & COMPAT_PSR_Q_BIT ? 'Q' : 'q', - pstate & COMPAT_PSR_T_BIT ? "T32" : "A32", - pstate & COMPAT_PSR_E_BIT ? "BE" : "LE", - pstate & COMPAT_PSR_A_BIT ? 'A' : 'a', - pstate & COMPAT_PSR_I_BIT ? 'I' : 'i', - pstate & COMPAT_PSR_F_BIT ? 'F' : 'f'); + pstate & PSR_AA32_N_BIT ? 'N' : 'n', + pstate & PSR_AA32_Z_BIT ? 'Z' : 'z', + pstate & PSR_AA32_C_BIT ? 'C' : 'c', + pstate & PSR_AA32_V_BIT ? 'V' : 'v', + pstate & PSR_AA32_Q_BIT ? 'Q' : 'q', + pstate & PSR_AA32_T_BIT ? "T32" : "A32", + pstate & PSR_AA32_E_BIT ? "BE" : "LE", + pstate & PSR_AA32_A_BIT ? 'A' : 'a', + pstate & PSR_AA32_I_BIT ? 'I' : 'i', + pstate & PSR_AA32_F_BIT ? 'F' : 'f'); } else { printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO)\n", pstate, @@ -257,7 +257,7 @@ static void tls_thread_flush(void) write_sysreg(0, tpidr_el0); if (is_compat_task()) { - current->thread.tp_value = 0; + current->thread.uw.tp_value = 0; /* * We need to ensure ordering between the shadow state and the @@ -351,13 +351,17 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, * for the new thread. */ if (clone_flags & CLONE_SETTLS) - p->thread.tp_value = childregs->regs[3]; + p->thread.uw.tp_value = childregs->regs[3]; } else { memset(childregs, 0, sizeof(struct pt_regs)); childregs->pstate = PSR_MODE_EL1h; if (IS_ENABLED(CONFIG_ARM64_UAO) && cpus_have_const_cap(ARM64_HAS_UAO)) childregs->pstate |= PSR_UAO_BIT; + + if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) + childregs->pstate |= PSR_SSBS_BIT; + p->thread.cpu_context.x19 = stack_start; p->thread.cpu_context.x20 = stk_sz; } @@ -379,7 +383,7 @@ static void tls_thread_switch(struct task_struct *next) tls_preserve_current_state(); if (is_compat_thread(task_thread_info(next))) - write_sysreg(next->thread.tp_value, tpidrro_el0); + write_sysreg(next->thread.uw.tp_value, tpidrro_el0); else if (!arm64_kernel_unmapped_at_el0()) write_sysreg(0, tpidrro_el0); diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index e8edbf13302a..8cdaf25e99cd 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -24,7 +24,6 @@ #include <uapi/linux/psci.h> -#include <asm/compiler.h> #include <asm/cpu_ops.h> #include <asm/errno.h> #include <asm/smp_plat.h> diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 9ae31f7e2243..1710a2d01669 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -25,6 +25,7 @@ #include <linux/sched/signal.h> #include <linux/sched/task_stack.h> #include <linux/mm.h> +#include <linux/nospec.h> #include <linux/smp.h> #include <linux/ptrace.h> #include <linux/user.h> @@ -43,6 +44,7 @@ #include <asm/compat.h> #include <asm/cpufeature.h> #include <asm/debug-monitors.h> +#include <asm/fpsimd.h> #include <asm/pgtable.h> #include <asm/stacktrace.h> #include <asm/syscall.h> @@ -130,7 +132,7 @@ static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) { return ((addr & ~(THREAD_SIZE - 1)) == (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) || - on_irq_stack(addr); + on_irq_stack(addr, NULL); } /** @@ -180,13 +182,7 @@ static void ptrace_hbptriggered(struct perf_event *bp, struct pt_regs *regs) { struct arch_hw_breakpoint *bkpt = counter_arch_bp(bp); - siginfo_t info; - - clear_siginfo(&info); - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = TRAP_HWBKPT; - info.si_addr = (void __user *)(bkpt->trigger); + const char *desc = "Hardware breakpoint trap (ptrace)"; #ifdef CONFIG_COMPAT if (is_compat_task()) { @@ -206,10 +202,14 @@ static void ptrace_hbptriggered(struct perf_event *bp, break; } } - force_sig_ptrace_errno_trap(si_errno, (void __user *)bkpt->trigger); + arm64_force_sig_ptrace_errno_trap(si_errno, + (void __user *)bkpt->trigger, + desc); } #endif - force_sig_info(SIGTRAP, &info, current); + arm64_force_sig_fault(SIGTRAP, TRAP_HWBKPT, + (void __user *)(bkpt->trigger), + desc); } /* @@ -249,15 +249,20 @@ static struct perf_event *ptrace_hbp_get_event(unsigned int note_type, switch (note_type) { case NT_ARM_HW_BREAK: - if (idx < ARM_MAX_BRP) - bp = tsk->thread.debug.hbp_break[idx]; + if (idx >= ARM_MAX_BRP) + goto out; + idx = array_index_nospec(idx, ARM_MAX_BRP); + bp = tsk->thread.debug.hbp_break[idx]; break; case NT_ARM_HW_WATCH: - if (idx < ARM_MAX_WRP) - bp = tsk->thread.debug.hbp_watch[idx]; + if (idx >= ARM_MAX_WRP) + goto out; + idx = array_index_nospec(idx, ARM_MAX_WRP); + bp = tsk->thread.debug.hbp_watch[idx]; break; } +out: return bp; } @@ -270,19 +275,22 @@ static int ptrace_hbp_set_event(unsigned int note_type, switch (note_type) { case NT_ARM_HW_BREAK: - if (idx < ARM_MAX_BRP) { - tsk->thread.debug.hbp_break[idx] = bp; - err = 0; - } + if (idx >= ARM_MAX_BRP) + goto out; + idx = array_index_nospec(idx, ARM_MAX_BRP); + tsk->thread.debug.hbp_break[idx] = bp; + err = 0; break; case NT_ARM_HW_WATCH: - if (idx < ARM_MAX_WRP) { - tsk->thread.debug.hbp_watch[idx] = bp; - err = 0; - } + if (idx >= ARM_MAX_WRP) + goto out; + idx = array_index_nospec(idx, ARM_MAX_WRP); + tsk->thread.debug.hbp_watch[idx] = bp; + err = 0; break; } +out: return err; } @@ -629,7 +637,7 @@ static int __fpr_get(struct task_struct *target, sve_sync_to_fpsimd(target); - uregs = &target->thread.fpsimd_state.user_fpsimd; + uregs = &target->thread.uw.fpsimd_state; return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, start_pos, start_pos + sizeof(*uregs)); @@ -655,19 +663,19 @@ static int __fpr_set(struct task_struct *target, struct user_fpsimd_state newstate; /* - * Ensure target->thread.fpsimd_state is up to date, so that a + * Ensure target->thread.uw.fpsimd_state is up to date, so that a * short copyin can't resurrect stale data. */ sve_sync_to_fpsimd(target); - newstate = target->thread.fpsimd_state.user_fpsimd; + newstate = target->thread.uw.fpsimd_state; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, start_pos, start_pos + sizeof(newstate)); if (ret) return ret; - target->thread.fpsimd_state.user_fpsimd = newstate; + target->thread.uw.fpsimd_state = newstate; return ret; } @@ -692,7 +700,7 @@ static int tls_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - unsigned long *tls = &target->thread.tp_value; + unsigned long *tls = &target->thread.uw.tp_value; if (target == current) tls_preserve_current_state(); @@ -705,13 +713,13 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { int ret; - unsigned long tls = target->thread.tp_value; + unsigned long tls = target->thread.uw.tp_value; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); if (ret) return ret; - target->thread.tp_value = tls; + target->thread.uw.tp_value = tls; return ret; } @@ -760,9 +768,6 @@ static void sve_init_header_from_task(struct user_sve_header *header, vq = sve_vq_from_vl(header->vl); header->max_vl = sve_max_vl; - if (WARN_ON(!sve_vl_valid(sve_max_vl))) - header->max_vl = header->vl; - header->size = SVE_PT_SIZE(vq, header->flags); header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl), SVE_PT_REGS_SVE); @@ -842,7 +847,7 @@ static int sve_get(struct task_struct *target, start = end; end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE; ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpsimd_state.fpsr, + &target->thread.uw.fpsimd_state.fpsr, start, end); if (ret) return ret; @@ -941,7 +946,7 @@ static int sve_set(struct task_struct *target, start = end; end = SVE_PT_SVE_FPCR_OFFSET(vq) + SVE_PT_SVE_FPCR_SIZE; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpsimd_state.fpsr, + &target->thread.uw.fpsimd_state.fpsr, start, end); out: @@ -1040,8 +1045,6 @@ static const struct user_regset_view user_aarch64_view = { }; #ifdef CONFIG_COMPAT -#include <linux/compat.h> - enum compat_regset { REGSET_COMPAT_GPR, REGSET_COMPAT_VFP, @@ -1074,6 +1077,7 @@ static int compat_gpr_get(struct task_struct *target, break; case 16: reg = task_pt_regs(target)->pstate; + reg = pstate_to_compat_psr(reg); break; case 17: reg = task_pt_regs(target)->orig_x0; @@ -1141,6 +1145,7 @@ static int compat_gpr_set(struct task_struct *target, newregs.pc = reg; break; case 16: + reg = compat_psr_to_pstate(reg); newregs.pstate = reg; break; case 17: @@ -1169,7 +1174,7 @@ static int compat_vfp_get(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; - uregs = &target->thread.fpsimd_state.user_fpsimd; + uregs = &target->thread.uw.fpsimd_state; if (target == current) fpsimd_preserve_current_state(); @@ -1202,7 +1207,7 @@ static int compat_vfp_set(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; - uregs = &target->thread.fpsimd_state.user_fpsimd; + uregs = &target->thread.uw.fpsimd_state; vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, uregs, 0, @@ -1225,7 +1230,7 @@ static int compat_tls_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - compat_ulong_t tls = (compat_ulong_t)target->thread.tp_value; + compat_ulong_t tls = (compat_ulong_t)target->thread.uw.tp_value; return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); } @@ -1235,13 +1240,13 @@ static int compat_tls_set(struct task_struct *target, const void __user *ubuf) { int ret; - compat_ulong_t tls = target->thread.tp_value; + compat_ulong_t tls = target->thread.uw.tp_value; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); if (ret) return ret; - target->thread.tp_value = tls; + target->thread.uw.tp_value = tls; return ret; } @@ -1458,9 +1463,7 @@ static int compat_ptrace_gethbpregs(struct task_struct *tsk, compat_long_t num, { int ret; u32 kdata; - mm_segment_t old_fs = get_fs(); - set_fs(KERNEL_DS); /* Watchpoint */ if (num < 0) { ret = compat_ptrace_hbp_get(NT_ARM_HW_WATCH, tsk, num, &kdata); @@ -1471,7 +1474,6 @@ static int compat_ptrace_gethbpregs(struct task_struct *tsk, compat_long_t num, } else { ret = compat_ptrace_hbp_get(NT_ARM_HW_BREAK, tsk, num, &kdata); } - set_fs(old_fs); if (!ret) ret = put_user(kdata, data); @@ -1484,7 +1486,6 @@ static int compat_ptrace_sethbpregs(struct task_struct *tsk, compat_long_t num, { int ret; u32 kdata = 0; - mm_segment_t old_fs = get_fs(); if (num == 0) return 0; @@ -1493,12 +1494,10 @@ static int compat_ptrace_sethbpregs(struct task_struct *tsk, compat_long_t num, if (ret) return ret; - set_fs(KERNEL_DS); if (num < 0) ret = compat_ptrace_hbp_set(NT_ARM_HW_WATCH, tsk, num, &kdata); else ret = compat_ptrace_hbp_set(NT_ARM_HW_BREAK, tsk, num, &kdata); - set_fs(old_fs); return ret; } @@ -1538,7 +1537,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, break; case COMPAT_PTRACE_GET_THREAD_AREA: - ret = put_user((compat_ulong_t)child->thread.tp_value, + ret = put_user((compat_ulong_t)child->thread.uw.tp_value, (compat_ulong_t __user *)datap); break; @@ -1633,7 +1632,7 @@ static void tracehook_report_syscall(struct pt_regs *regs, regs->regs[regno] = saved_reg; } -asmlinkage int syscall_trace_enter(struct pt_regs *regs) +int syscall_trace_enter(struct pt_regs *regs) { if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); @@ -1651,7 +1650,7 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs) return regs->syscallno; } -asmlinkage void syscall_trace_exit(struct pt_regs *regs) +void syscall_trace_exit(struct pt_regs *regs) { audit_syscall_exit(regs); @@ -1660,18 +1659,24 @@ asmlinkage void syscall_trace_exit(struct pt_regs *regs) if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); + + rseq_syscall(regs); } /* - * Bits which are always architecturally RES0 per ARM DDI 0487A.h + * SPSR_ELx bits which are always architecturally RES0 per ARM DDI 0487C.a + * We also take into account DIT (bit 24), which is not yet documented, and + * treat PAN and UAO as RES0 bits, as they are meaningless at EL0, and may be + * allocated an EL0 meaning in future. * Userspace cannot use these until they have an architectural meaning. + * Note that this follows the SPSR_ELx format, not the AArch32 PSR format. * We also reserve IL for the kernel; SS is handled dynamically. */ #define SPSR_EL1_AARCH64_RES0_BITS \ - (GENMASK_ULL(63,32) | GENMASK_ULL(27, 22) | GENMASK_ULL(20, 10) | \ - GENMASK_ULL(5, 5)) + (GENMASK_ULL(63,32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \ + GENMASK_ULL(20, 10) | GENMASK_ULL(5, 5)) #define SPSR_EL1_AARCH32_RES0_BITS \ - (GENMASK_ULL(63,32) | GENMASK_ULL(24, 22) | GENMASK_ULL(20,20)) + (GENMASK_ULL(63,32) | GENMASK_ULL(23, 22) | GENMASK_ULL(20,20)) static int valid_compat_regs(struct user_pt_regs *regs) { @@ -1679,15 +1684,15 @@ static int valid_compat_regs(struct user_pt_regs *regs) if (!system_supports_mixed_endian_el0()) { if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) - regs->pstate |= COMPAT_PSR_E_BIT; + regs->pstate |= PSR_AA32_E_BIT; else - regs->pstate &= ~COMPAT_PSR_E_BIT; + regs->pstate &= ~PSR_AA32_E_BIT; } if (user_mode(regs) && (regs->pstate & PSR_MODE32_BIT) && - (regs->pstate & COMPAT_PSR_A_BIT) == 0 && - (regs->pstate & COMPAT_PSR_I_BIT) == 0 && - (regs->pstate & COMPAT_PSR_F_BIT) == 0) { + (regs->pstate & PSR_AA32_A_BIT) == 0 && + (regs->pstate & PSR_AA32_I_BIT) == 0 && + (regs->pstate & PSR_AA32_F_BIT) == 0) { return 1; } @@ -1695,11 +1700,11 @@ static int valid_compat_regs(struct user_pt_regs *regs) * Force PSR to a valid 32-bit EL0t, preserving the same bits as * arch/arm. */ - regs->pstate &= COMPAT_PSR_N_BIT | COMPAT_PSR_Z_BIT | - COMPAT_PSR_C_BIT | COMPAT_PSR_V_BIT | - COMPAT_PSR_Q_BIT | COMPAT_PSR_IT_MASK | - COMPAT_PSR_GE_MASK | COMPAT_PSR_E_BIT | - COMPAT_PSR_T_BIT; + regs->pstate &= PSR_AA32_N_BIT | PSR_AA32_Z_BIT | + PSR_AA32_C_BIT | PSR_AA32_V_BIT | + PSR_AA32_Q_BIT | PSR_AA32_IT_MASK | + PSR_AA32_GE_MASK | PSR_AA32_E_BIT | + PSR_AA32_T_BIT; regs->pstate |= PSR_MODE32_BIT; return 0; diff --git a/arch/arm64/kernel/reloc_test_core.c b/arch/arm64/kernel/reloc_test_core.c index c124752a8bd3..5915ce5759cc 100644 --- a/arch/arm64/kernel/reloc_test_core.c +++ b/arch/arm64/kernel/reloc_test_core.c @@ -28,6 +28,7 @@ asmlinkage u64 absolute_data16(void); asmlinkage u64 signed_movw(void); asmlinkage u64 unsigned_movw(void); asmlinkage u64 relative_adrp(void); +asmlinkage u64 relative_adrp_far(void); asmlinkage u64 relative_adr(void); asmlinkage u64 relative_data64(void); asmlinkage u64 relative_data32(void); @@ -43,9 +44,8 @@ static struct { { "R_AARCH64_ABS16", absolute_data16, UL(SYM16_ABS_VAL) }, { "R_AARCH64_MOVW_SABS_Gn", signed_movw, UL(SYM64_ABS_VAL) }, { "R_AARCH64_MOVW_UABS_Gn", unsigned_movw, UL(SYM64_ABS_VAL) }, -#ifndef CONFIG_ARM64_ERRATUM_843419 { "R_AARCH64_ADR_PREL_PG_HI21", relative_adrp, (u64)&sym64_rel }, -#endif + { "R_AARCH64_ADR_PREL_PG_HI21", relative_adrp_far, (u64)&memstart_addr }, { "R_AARCH64_ADR_PREL_LO21", relative_adr, (u64)&sym64_rel }, { "R_AARCH64_PREL64", relative_data64, (u64)&sym64_rel }, { "R_AARCH64_PREL32", relative_data32, (u64)&sym64_rel }, diff --git a/arch/arm64/kernel/reloc_test_syms.S b/arch/arm64/kernel/reloc_test_syms.S index e1edcefeb02d..2b8d9cb8b078 100644 --- a/arch/arm64/kernel/reloc_test_syms.S +++ b/arch/arm64/kernel/reloc_test_syms.S @@ -43,15 +43,21 @@ ENTRY(unsigned_movw) ret ENDPROC(unsigned_movw) -#ifndef CONFIG_ARM64_ERRATUM_843419 - + .align 12 + .space 0xff8 ENTRY(relative_adrp) adrp x0, sym64_rel add x0, x0, #:lo12:sym64_rel ret ENDPROC(relative_adrp) -#endif + .align 12 + .space 0xffc +ENTRY(relative_adrp_far) + adrp x0, memstart_addr + add x0, x0, #:lo12:memstart_addr + ret +ENDPROC(relative_adrp_far) ENTRY(relative_adr) adr x0, sym64_rel diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 6b8d90d5ceae..5ba4465e44f0 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -13,6 +13,7 @@ #include <asm/mmu.h> #include <asm/ptrace.h> #include <asm/sections.h> +#include <asm/stacktrace.h> #include <asm/sysreg.h> #include <asm/vmap_stack.h> @@ -88,23 +89,52 @@ static int init_sdei_stacks(void) return err; } -bool _on_sdei_stack(unsigned long sp) +static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info) { - unsigned long low, high; + unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); + unsigned long high = low + SDEI_STACK_SIZE; - if (!IS_ENABLED(CONFIG_VMAP_STACK)) + if (sp < low || sp >= high) return false; - low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); - high = low + SDEI_STACK_SIZE; + if (info) { + info->low = low; + info->high = high; + info->type = STACK_TYPE_SDEI_NORMAL; + } - if (low <= sp && sp < high) + return true; +} + +static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info) +{ + unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); + unsigned long high = low + SDEI_STACK_SIZE; + + if (sp < low || sp >= high) + return false; + + if (info) { + info->low = low; + info->high = high; + info->type = STACK_TYPE_SDEI_CRITICAL; + } + + return true; +} + +bool _on_sdei_stack(unsigned long sp, struct stack_info *info) +{ + if (!IS_ENABLED(CONFIG_VMAP_STACK)) + return false; + + if (on_sdei_critical_stack(sp, info)) return true; - low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); - high = low + SDEI_STACK_SIZE; + if (on_sdei_normal_stack(sp, info)) + return true; - return (low <= sp && sp < high); + return false; } unsigned long sdei_arch_get_entry_point(int conduit) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 30ad2f085d1f..953e316521fc 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -26,7 +26,6 @@ #include <linux/initrd.h> #include <linux/console.h> #include <linux/cache.h> -#include <linux/bootmem.h> #include <linux/screen_info.h> #include <linux/init.h> #include <linux/kexec.h> @@ -64,6 +63,9 @@ #include <asm/xen/hypervisor.h> #include <asm/mmu_context.h> +static int num_standard_resources; +static struct resource *standard_resources; + phys_addr_t __fdt_pointer __initdata; /* @@ -206,14 +208,20 @@ static void __init request_standard_resources(void) { struct memblock_region *region; struct resource *res; + unsigned long i = 0; kernel_code.start = __pa_symbol(_text); kernel_code.end = __pa_symbol(__init_begin - 1); kernel_data.start = __pa_symbol(_sdata); kernel_data.end = __pa_symbol(_end - 1); + num_standard_resources = memblock.memory.cnt; + standard_resources = memblock_alloc_low(num_standard_resources * + sizeof(*standard_resources), + SMP_CACHE_BYTES); + for_each_memblock(memory, region) { - res = alloc_bootmem_low(sizeof(*res)); + res = &standard_resources[i++]; if (memblock_is_nomap(region)) { res->name = "reserved"; res->flags = IORESOURCE_MEM; @@ -241,6 +249,34 @@ static void __init request_standard_resources(void) } } +static int __init reserve_memblock_reserved_regions(void) +{ + u64 i, j; + + for (i = 0; i < num_standard_resources; ++i) { + struct resource *mem = &standard_resources[i]; + phys_addr_t r_start, r_end, mem_size = resource_size(mem); + + if (!memblock_is_region_reserved(mem->start, mem_size)) + continue; + + for_each_reserved_mem_region(j, &r_start, &r_end) { + resource_size_t start, end; + + start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start); + end = min(PFN_PHYS(PFN_UP(r_end)) - 1, mem->end); + + if (start > mem->end || end < mem->start) + continue; + + reserve_region_with_split(mem, start, end, "reserved"); + } + } + + return 0; +} +arch_initcall(reserve_memblock_reserved_regions); + u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; void __init setup_arch(char **cmdline_p) @@ -313,12 +349,8 @@ void __init setup_arch(char **cmdline_p) #endif #ifdef CONFIG_VT -#if defined(CONFIG_VGA_CONSOLE) - conswitchp = &vga_con; -#elif defined(CONFIG_DUMMY_CONSOLE) conswitchp = &dummy_con; #endif -#endif if (boot_args[1] || boot_args[2] || boot_args[3]) { pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n" "\tx1: %016llx\n\tx2: %016llx\n\tx3: %016llx\n" diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index f60c052e8d1c..5dcc942906db 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -17,6 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/cache.h> #include <linux/compat.h> #include <linux/errno.h> #include <linux/kernel.h> @@ -40,6 +41,7 @@ #include <asm/fpsimd.h> #include <asm/ptrace.h> #include <asm/signal32.h> +#include <asm/traps.h> #include <asm/vdso.h> /* @@ -179,7 +181,7 @@ static void __user *apply_user_offset( static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) { struct user_fpsimd_state const *fpsimd = - ¤t->thread.fpsimd_state.user_fpsimd; + ¤t->thread.uw.fpsimd_state; int err; /* copy the FP and status/control registers */ @@ -537,8 +539,9 @@ static int restore_sigframe(struct pt_regs *regs, return err; } -asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) +SYSCALL_DEFINE0(rt_sigreturn) { + struct pt_regs *regs = current_pt_regs(); struct rt_sigframe __user *frame; /* Always make any pending restarted system calls return -EINTR */ @@ -565,16 +568,19 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) return regs->regs[0]; badframe: - if (show_unhandled_signals) - pr_info_ratelimited("%s[%d]: bad frame in %s: pc=%08llx sp=%08llx\n", - current->comm, task_pid_nr(current), __func__, - regs->pc, regs->sp); - force_sig(SIGSEGV, current); + arm64_notify_segfault(regs->sp); return 0; } -/* Determine the layout of optional records in the signal frame */ -static int setup_sigframe_layout(struct rt_sigframe_user_layout *user) +/* + * Determine the layout of optional records in the signal frame + * + * add_all: if true, lays out the biggest possible signal frame for + * this task; otherwise, generates a layout for the current state + * of the task. + */ +static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, + bool add_all) { int err; @@ -584,7 +590,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user) return err; /* fault information, if valid */ - if (current->thread.fault_code) { + if (add_all || current->thread.fault_code) { err = sigframe_alloc(user, &user->esr_offset, sizeof(struct esr_context)); if (err) @@ -594,8 +600,14 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user) if (system_supports_sve()) { unsigned int vq = 0; - if (test_thread_flag(TIF_SVE)) - vq = sve_vq_from_vl(current->thread.sve_vl); + if (add_all || test_thread_flag(TIF_SVE)) { + int vl = sve_max_vl; + + if (!add_all) + vl = current->thread.sve_vl; + + vq = sve_vq_from_vl(vl); + } err = sigframe_alloc(user, &user->sve_offset, SVE_SIG_CONTEXT_SIZE(vq)); @@ -606,7 +618,6 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user) return sigframe_alloc_end(user); } - static int setup_sigframe(struct rt_sigframe_user_layout *user, struct pt_regs *regs, sigset_t *set) { @@ -704,7 +715,7 @@ static int get_sigframe(struct rt_sigframe_user_layout *user, int err; init_user_layout(user); - err = setup_sigframe_layout(user); + err = setup_sigframe_layout(user, false); if (err) return err; @@ -792,6 +803,8 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) int usig = ksig->sig; int ret; + rseq_signal_deliver(ksig, regs); + /* * Set up the stack frame */ @@ -833,11 +846,12 @@ static void do_signal(struct pt_regs *regs) unsigned long continue_addr = 0, restart_addr = 0; int retval = 0; struct ksignal ksig; + bool syscall = in_syscall(regs); /* * If we were from a system call, check for system call restarting... */ - if (in_syscall(regs)) { + if (syscall) { continue_addr = regs->pc; restart_addr = continue_addr - (compat_thumb_mode(regs) ? 2 : 4); retval = regs->regs[0]; @@ -889,7 +903,7 @@ static void do_signal(struct pt_regs *regs) * Handle restarting a different system call. As above, if a debugger * has chosen to restart at a different PC, ignore the restart. */ - if (in_syscall(regs) && regs->pc == restart_addr) { + if (syscall && regs->pc == restart_addr) { if (retval == -ERESTART_RESTARTBLOCK) setup_restart_syscall(regs); user_rewind_single_step(current); @@ -899,7 +913,7 @@ static void do_signal(struct pt_regs *regs) } asmlinkage void do_notify_resume(struct pt_regs *regs, - unsigned int thread_flags) + unsigned long thread_flags) { /* * The assembly code enters us with IRQs off, but it hasn't @@ -929,6 +943,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, if (thread_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); + rseq_handle_notify_resume(NULL, regs); } if (thread_flags & _TIF_FOREIGN_FPSTATE) @@ -939,3 +954,28 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, thread_flags = READ_ONCE(current_thread_info()->flags); } while (thread_flags & _TIF_WORK_MASK); } + +unsigned long __ro_after_init signal_minsigstksz; + +/* + * Determine the stack space required for guaranteed signal devliery. + * This function is used to populate AT_MINSIGSTKSZ at process startup. + * cpufeatures setup is assumed to be complete. + */ +void __init minsigstksz_setup(void) +{ + struct rt_sigframe_user_layout user; + + init_user_layout(&user); + + /* + * If this fails, SIGFRAME_MAXSZ needs to be enlarged. It won't + * be big enough, but it's our best guess: + */ + if (WARN_ON(setup_sigframe_layout(&user, true))) + return; + + signal_minsigstksz = sigframe_size(&user) + + round_up(sizeof(struct frame_record), 16) + + 16; /* max alignment padding */ +} diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 79feb861929b..24b09003f821 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -26,6 +26,7 @@ #include <asm/esr.h> #include <asm/fpsimd.h> #include <asm/signal32.h> +#include <asm/traps.h> #include <linux/uaccess.h> #include <asm/unistd.h> @@ -149,7 +150,7 @@ union __fpsimd_vreg { static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) { struct user_fpsimd_state const *fpsimd = - ¤t->thread.fpsimd_state.user_fpsimd; + ¤t->thread.uw.fpsimd_state; compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr, fpexc; @@ -242,6 +243,7 @@ static int compat_restore_sigframe(struct pt_regs *regs, int err; sigset_t set; struct compat_aux_sigframe __user *aux; + unsigned long psr; err = get_sigset_t(&set, &sf->uc.uc_sigmask); if (err == 0) { @@ -265,7 +267,9 @@ static int compat_restore_sigframe(struct pt_regs *regs, __get_user_error(regs->compat_sp, &sf->uc.uc_mcontext.arm_sp, err); __get_user_error(regs->compat_lr, &sf->uc.uc_mcontext.arm_lr, err); __get_user_error(regs->pc, &sf->uc.uc_mcontext.arm_pc, err); - __get_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err); + __get_user_error(psr, &sf->uc.uc_mcontext.arm_cpsr, err); + + regs->pstate = compat_psr_to_pstate(psr); /* * Avoid compat_sys_sigreturn() restarting. @@ -281,8 +285,9 @@ static int compat_restore_sigframe(struct pt_regs *regs, return err; } -asmlinkage int compat_sys_sigreturn(struct pt_regs *regs) +COMPAT_SYSCALL_DEFINE0(sigreturn) { + struct pt_regs *regs = current_pt_regs(); struct compat_sigframe __user *frame; /* Always make any pending restarted system calls return -EINTR */ @@ -307,16 +312,13 @@ asmlinkage int compat_sys_sigreturn(struct pt_regs *regs) return regs->regs[0]; badframe: - if (show_unhandled_signals) - pr_info_ratelimited("%s[%d]: bad frame in %s: pc=%08llx sp=%08llx\n", - current->comm, task_pid_nr(current), __func__, - regs->pc, regs->compat_sp); - force_sig(SIGSEGV, current); + arm64_notify_segfault(regs->compat_sp); return 0; } -asmlinkage int compat_sys_rt_sigreturn(struct pt_regs *regs) +COMPAT_SYSCALL_DEFINE0(rt_sigreturn) { + struct pt_regs *regs = current_pt_regs(); struct compat_rt_sigframe __user *frame; /* Always make any pending restarted system calls return -EINTR */ @@ -344,11 +346,7 @@ asmlinkage int compat_sys_rt_sigreturn(struct pt_regs *regs) return regs->regs[0]; badframe: - if (show_unhandled_signals) - pr_info_ratelimited("%s[%d]: bad frame in %s: pc=%08llx sp=%08llx\n", - current->comm, task_pid_nr(current), __func__, - regs->pc, regs->compat_sp); - force_sig(SIGSEGV, current); + arm64_notify_segfault(regs->compat_sp); return 0; } @@ -379,22 +377,22 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, { compat_ulong_t handler = ptr_to_compat(ka->sa.sa_handler); compat_ulong_t retcode; - compat_ulong_t spsr = regs->pstate & ~(PSR_f | COMPAT_PSR_E_BIT); + compat_ulong_t spsr = regs->pstate & ~(PSR_f | PSR_AA32_E_BIT); int thumb; /* Check if the handler is written for ARM or Thumb */ thumb = handler & 1; if (thumb) - spsr |= COMPAT_PSR_T_BIT; + spsr |= PSR_AA32_T_BIT; else - spsr &= ~COMPAT_PSR_T_BIT; + spsr &= ~PSR_AA32_T_BIT; /* The IT state must be cleared for both ARM and Thumb-2 */ - spsr &= ~COMPAT_PSR_IT_MASK; + spsr &= ~PSR_AA32_IT_MASK; /* Restore the original endianness */ - spsr |= COMPAT_PSR_ENDSTATE; + spsr |= PSR_AA32_ENDSTATE; if (ka->sa.sa_flags & SA_RESTORER) { retcode = ptr_to_compat(ka->sa.sa_restorer); @@ -421,6 +419,7 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf, struct pt_regs *regs, sigset_t *set) { struct compat_aux_sigframe __user *aux; + unsigned long psr = pstate_to_compat_psr(regs->pstate); int err = 0; __put_user_error(regs->regs[0], &sf->uc.uc_mcontext.arm_r0, err); @@ -439,7 +438,7 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf, __put_user_error(regs->compat_sp, &sf->uc.uc_mcontext.arm_sp, err); __put_user_error(regs->compat_lr, &sf->uc.uc_mcontext.arm_lr, err); __put_user_error(regs->pc, &sf->uc.uc_mcontext.arm_pc, err); - __put_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err); + __put_user_error(psr, &sf->uc.uc_mcontext.arm_cpsr, err); __put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.trap_no, err); /* set the compat FSR WnR */ diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index bebec8ef9372..3e53ffa07994 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -101,6 +101,7 @@ ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly bl __cpu_setup /* enable the MMU early - so we can access sleep_save_stash by va */ + adrp x1, swapper_pg_dir bl __enable_mmu ldr x8, =_cpu_resume br x8 diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 3b8ad7be9c33..96b8f2f51ab2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -85,43 +85,6 @@ enum ipi_msg_type { IPI_WAKEUP }; -#ifdef CONFIG_ARM64_VHE - -/* Whether the boot CPU is running in HYP mode or not*/ -static bool boot_cpu_hyp_mode; - -static inline void save_boot_cpu_run_el(void) -{ - boot_cpu_hyp_mode = is_kernel_in_hyp_mode(); -} - -static inline bool is_boot_cpu_in_hyp_mode(void) -{ - return boot_cpu_hyp_mode; -} - -/* - * Verify that a secondary CPU is running the kernel at the same - * EL as that of the boot CPU. - */ -void verify_cpu_run_el(void) -{ - bool in_el2 = is_kernel_in_hyp_mode(); - bool boot_cpu_el2 = is_boot_cpu_in_hyp_mode(); - - if (in_el2 ^ boot_cpu_el2) { - pr_crit("CPU%d: mismatched Exception Level(EL%d) with boot CPU(EL%d)\n", - smp_processor_id(), - in_el2 ? 2 : 1, - boot_cpu_el2 ? 2 : 1); - cpu_panic_kernel(); - } -} - -#else -static inline void save_boot_cpu_run_el(void) {} -#endif - #ifdef CONFIG_HOTPLUG_CPU static int op_cpu_kill(unsigned int cpu); #else @@ -216,7 +179,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. */ -asmlinkage void secondary_start_kernel(void) +asmlinkage notrace void secondary_start_kernel(void) { u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; struct mm_struct *mm = &init_mm; @@ -262,6 +225,7 @@ asmlinkage void secondary_start_kernel(void) notify_cpu_starting(cpu); store_cpu_topology(cpu); + numa_add_cpu(cpu); /* * OK, now it's safe to let the boot CPU continue. Wait for @@ -315,6 +279,9 @@ int __cpu_disable(void) if (ret) return ret; + remove_cpu_topology(cpu); + numa_remove_cpu(cpu); + /* * Take this CPU offline. Once we clear this, we can't return, * and we must not schedule until we're ready to give up the cpu. @@ -447,13 +414,6 @@ void __init smp_prepare_boot_cpu(void) */ jump_label_init(); cpuinfo_store_boot_cpu(); - save_boot_cpu_run_el(); - /* - * Run the errata work around checks on the boot CPU, once we have - * initialised the cpu feature infrastructure from - * cpuinfo_store_boot_cpu() above. - */ - update_cpu_errata_workarounds(); } static u64 __init of_get_cpu_mpidr(struct device_node *dn) @@ -562,7 +522,6 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) } bootcpu_valid = true; cpu_madt_gicc[0] = *processor; - early_map_cpu_to_node(0, acpi_numa_get_nid(0, hwid)); return; } @@ -585,8 +544,6 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) */ acpi_set_mailbox_entry(cpu_count, processor); - early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count, hwid)); - cpu_count++; } @@ -606,8 +563,34 @@ acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header, return 0; } + +static void __init acpi_parse_and_init_cpus(void) +{ + int i; + + /* + * do a walk of MADT to determine how many CPUs + * we have including disabled CPUs, and get information + * we need for SMP init. + */ + acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, + acpi_parse_gic_cpu_interface, 0); + + /* + * In ACPI, SMP and CPU NUMA information is provided in separate + * static tables, namely the MADT and the SRAT. + * + * Thus, it is simpler to first create the cpu logical map through + * an MADT walk and then map the logical cpus to their node ids + * as separate steps. + */ + acpi_map_cpus_to_nodes(); + + for (i = 0; i < nr_cpu_ids; i++) + early_map_cpu_to_node(i, acpi_numa_get_nid(i)); +} #else -#define acpi_table_parse_madt(...) do { } while (0) +#define acpi_parse_and_init_cpus(...) do { } while (0) #endif /* @@ -619,7 +602,7 @@ static void __init of_parse_and_init_cpus(void) { struct device_node *dn; - for_each_node_by_type(dn, "cpu") { + for_each_of_cpu_node(dn) { u64 hwid = of_get_cpu_mpidr(dn); if (hwid == INVALID_HWID) @@ -680,13 +663,7 @@ void __init smp_init_cpus(void) if (acpi_disabled) of_parse_and_init_cpus(); else - /* - * do a walk of MADT to determine how many CPUs - * we have including disabled CPUs, and get information - * we need for SMP init - */ - acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, - acpi_parse_gic_cpu_interface, 0); + acpi_parse_and_init_cpus(); if (cpu_count > nr_cpu_ids) pr_warn("Number of cores (%d) exceeds configured maximum of %u - clipping\n", @@ -723,6 +700,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) this_cpu = smp_processor_id(); store_cpu_topology(this_cpu); numa_store_cpu_info(this_cpu); + numa_add_cpu(this_cpu); /* * If UP is mandated by "nosmp" (which implies "maxcpus=0"), don't set diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c new file mode 100644 index 000000000000..885f13e58708 --- /dev/null +++ b/arch/arm64/kernel/ssbd.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018 ARM Ltd, All Rights Reserved. + */ + +#include <linux/compat.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/sched/task_stack.h> +#include <linux/thread_info.h> + +#include <asm/cpufeature.h> + +static void ssbd_ssbs_enable(struct task_struct *task) +{ + u64 val = is_compat_thread(task_thread_info(task)) ? + PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; + + task_pt_regs(task)->pstate |= val; +} + +static void ssbd_ssbs_disable(struct task_struct *task) +{ + u64 val = is_compat_thread(task_thread_info(task)) ? + PSR_AA32_SSBS_BIT : PSR_SSBS_BIT; + + task_pt_regs(task)->pstate &= ~val; +} + +/* + * prctl interface for SSBD + */ +static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) +{ + int state = arm64_get_ssbd_state(); + + /* Unsupported */ + if (state == ARM64_SSBD_UNKNOWN) + return -EINVAL; + + /* Treat the unaffected/mitigated state separately */ + if (state == ARM64_SSBD_MITIGATED) { + switch (ctrl) { + case PR_SPEC_ENABLE: + return -EPERM; + case PR_SPEC_DISABLE: + case PR_SPEC_FORCE_DISABLE: + return 0; + } + } + + /* + * Things are a bit backward here: the arm64 internal API + * *enables the mitigation* when the userspace API *disables + * speculation*. So much fun. + */ + switch (ctrl) { + case PR_SPEC_ENABLE: + /* If speculation is force disabled, enable is not allowed */ + if (state == ARM64_SSBD_FORCE_ENABLE || + task_spec_ssb_force_disable(task)) + return -EPERM; + task_clear_spec_ssb_disable(task); + clear_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_enable(task); + break; + case PR_SPEC_DISABLE: + if (state == ARM64_SSBD_FORCE_DISABLE) + return -EPERM; + task_set_spec_ssb_disable(task); + set_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_disable(task); + break; + case PR_SPEC_FORCE_DISABLE: + if (state == ARM64_SSBD_FORCE_DISABLE) + return -EPERM; + task_set_spec_ssb_disable(task); + task_set_spec_ssb_force_disable(task); + set_tsk_thread_flag(task, TIF_SSBD); + ssbd_ssbs_disable(task); + break; + default: + return -ERANGE; + } + + return 0; +} + +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssbd_prctl_set(task, ctrl); + default: + return -ENODEV; + } +} + +static int ssbd_prctl_get(struct task_struct *task) +{ + switch (arm64_get_ssbd_state()) { + case ARM64_SSBD_UNKNOWN: + return -EINVAL; + case ARM64_SSBD_FORCE_ENABLE: + return PR_SPEC_DISABLE; + case ARM64_SSBD_KERNEL: + if (task_spec_ssb_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ssb_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + case ARM64_SSBD_FORCE_DISABLE: + return PR_SPEC_ENABLE; + default: + return PR_SPEC_NOT_AFFECTED; + } +} + +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssbd_prctl_get(task); + default: + return -ENODEV; + } +} diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index d5718a060672..4989f7ea1e59 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -50,7 +50,7 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) if (!tsk) tsk = current; - if (!on_accessible_stack(tsk, fp)) + if (!on_accessible_stack(tsk, fp, NULL)) return -EINVAL; frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index a307b9e13392..9405d1b7f4b0 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -48,6 +48,10 @@ void notrace __cpu_suspend_exit(void) */ cpu_uninstall_idmap(); + /* Restore CnP bit in TTBR1_EL1 */ + if (system_supports_cnp()) + cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); + /* * PSTATE was not saved over suspend/resume, re-enable any detected * features that might not have been set correctly. @@ -62,6 +66,14 @@ void notrace __cpu_suspend_exit(void) */ if (hw_breakpoint_restore) hw_breakpoint_restore(cpu); + + /* + * On resume, firmware implementing dynamic mitigation will + * have turned the mitigation on. If the user has forcefully + * disabled it, make sure their wishes are obeyed. + */ + if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) + arm64_set_ssbd_mitigation(false); } /* diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index 26fe8ea93ea2..b44065fb1616 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -25,16 +25,18 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/syscalls.h> + #include <asm/cpufeature.h> +#include <asm/syscall.h> -asmlinkage long sys_mmap(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, off_t off) +SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, off_t, off) { if (offset_in_page(off) != 0) return -EINVAL; - return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); + return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); } SYSCALL_DEFINE1(arm64_personality, unsigned int, personality) @@ -42,24 +44,25 @@ SYSCALL_DEFINE1(arm64_personality, unsigned int, personality) if (personality(personality) == PER_LINUX32 && !system_supports_32bit_el0()) return -EINVAL; - return sys_personality(personality); + return ksys_personality(personality); } /* * Wrappers to pass the pt_regs argument. */ -asmlinkage long sys_rt_sigreturn_wrapper(void); -#define sys_rt_sigreturn sys_rt_sigreturn_wrapper #define sys_personality sys_arm64_personality +asmlinkage long sys_ni_syscall(const struct pt_regs *); +#define __arm64_sys_ni_syscall sys_ni_syscall + #undef __SYSCALL -#define __SYSCALL(nr, sym) [nr] = sym, +#define __SYSCALL(nr, sym) asmlinkage long __arm64_##sym(const struct pt_regs *); +#include <asm/unistd.h> -/* - * The sys_call_table array must be 4K aligned to be accessible from - * kernel/entry.S. - */ -void * const sys_call_table[__NR_syscalls] __aligned(4096) = { - [0 ... __NR_syscalls - 1] = sys_ni_syscall, +#undef __SYSCALL +#define __SYSCALL(nr, sym) [nr] = (syscall_fn_t)__arm64_##sym, + +const syscall_fn_t sys_call_table[__NR_syscalls] = { + [0 ... __NR_syscalls - 1] = (syscall_fn_t)sys_ni_syscall, #include <asm/unistd.h> }; diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c index a40b1343b819..0f8bcb7de700 100644 --- a/arch/arm64/kernel/sys32.c +++ b/arch/arm64/kernel/sys32.c @@ -22,31 +22,128 @@ */ #define __COMPAT_SYSCALL_NR +#include <linux/compat.h> #include <linux/compiler.h> #include <linux/syscalls.h> -asmlinkage long compat_sys_sigreturn_wrapper(void); -asmlinkage long compat_sys_rt_sigreturn_wrapper(void); -asmlinkage long compat_sys_statfs64_wrapper(void); -asmlinkage long compat_sys_fstatfs64_wrapper(void); -asmlinkage long compat_sys_pread64_wrapper(void); -asmlinkage long compat_sys_pwrite64_wrapper(void); -asmlinkage long compat_sys_truncate64_wrapper(void); -asmlinkage long compat_sys_ftruncate64_wrapper(void); -asmlinkage long compat_sys_readahead_wrapper(void); -asmlinkage long compat_sys_fadvise64_64_wrapper(void); -asmlinkage long compat_sys_sync_file_range2_wrapper(void); -asmlinkage long compat_sys_fallocate_wrapper(void); -asmlinkage long compat_sys_mmap2_wrapper(void); +#include <asm/syscall.h> -#undef __SYSCALL -#define __SYSCALL(nr, sym) [nr] = sym, +asmlinkage long compat_sys_sigreturn(void); +asmlinkage long compat_sys_rt_sigreturn(void); + +COMPAT_SYSCALL_DEFINE3(aarch32_statfs64, const char __user *, pathname, + compat_size_t, sz, struct compat_statfs64 __user *, buf) +{ + /* + * 32-bit ARM applies an OABI compatibility fixup to statfs64 and + * fstatfs64 regardless of whether OABI is in use, and therefore + * arbitrary binaries may rely upon it, so we must do the same. + * For more details, see commit: + * + * 713c481519f19df9 ("[ARM] 3108/2: old ABI compat: statfs64 and + * fstatfs64") + */ + if (sz == 88) + sz = 84; + + return kcompat_sys_statfs64(pathname, sz, buf); +} + +COMPAT_SYSCALL_DEFINE3(aarch32_fstatfs64, unsigned int, fd, compat_size_t, sz, + struct compat_statfs64 __user *, buf) +{ + /* see aarch32_statfs64 */ + if (sz == 88) + sz = 84; + + return kcompat_sys_fstatfs64(fd, sz, buf); +} /* - * The sys_call_table array must be 4K aligned to be accessible from - * kernel/entry.S. + * Note: off_4k is always in units of 4K. If we can't do the + * requested offset because it is not page-aligned, we return -EINVAL. */ -void * const compat_sys_call_table[__NR_compat_syscalls] __aligned(4096) = { - [0 ... __NR_compat_syscalls - 1] = sys_ni_syscall, +COMPAT_SYSCALL_DEFINE6(aarch32_mmap2, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, unsigned long, off_4k) +{ + if (off_4k & (~PAGE_MASK >> 12)) + return -EINVAL; + + off_4k >>= (PAGE_SHIFT - 12); + + return ksys_mmap_pgoff(addr, len, prot, flags, fd, off_4k); +} + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define arg_u32p(name) u32, name##_hi, u32, name##_lo +#else +#define arg_u32p(name) u32, name##_lo, u32, name##_hi +#endif + +#define arg_u64(name) (((u64)name##_hi << 32) | name##_lo) + +COMPAT_SYSCALL_DEFINE6(aarch32_pread64, unsigned int, fd, char __user *, buf, + size_t, count, u32, __pad, arg_u32p(pos)) +{ + return ksys_pread64(fd, buf, count, arg_u64(pos)); +} + +COMPAT_SYSCALL_DEFINE6(aarch32_pwrite64, unsigned int, fd, + const char __user *, buf, size_t, count, u32, __pad, + arg_u32p(pos)) +{ + return ksys_pwrite64(fd, buf, count, arg_u64(pos)); +} + +COMPAT_SYSCALL_DEFINE4(aarch32_truncate64, const char __user *, pathname, + u32, __pad, arg_u32p(length)) +{ + return ksys_truncate(pathname, arg_u64(length)); +} + +COMPAT_SYSCALL_DEFINE4(aarch32_ftruncate64, unsigned int, fd, u32, __pad, + arg_u32p(length)) +{ + return ksys_ftruncate(fd, arg_u64(length)); +} + +COMPAT_SYSCALL_DEFINE5(aarch32_readahead, int, fd, u32, __pad, + arg_u32p(offset), size_t, count) +{ + return ksys_readahead(fd, arg_u64(offset), count); +} + +COMPAT_SYSCALL_DEFINE6(aarch32_fadvise64_64, int, fd, int, advice, + arg_u32p(offset), arg_u32p(len)) +{ + return ksys_fadvise64_64(fd, arg_u64(offset), arg_u64(len), advice); +} + +COMPAT_SYSCALL_DEFINE6(aarch32_sync_file_range2, int, fd, unsigned int, flags, + arg_u32p(offset), arg_u32p(nbytes)) +{ + return ksys_sync_file_range(fd, arg_u64(offset), arg_u64(nbytes), + flags); +} + +COMPAT_SYSCALL_DEFINE6(aarch32_fallocate, int, fd, int, mode, + arg_u32p(offset), arg_u32p(len)) +{ + return ksys_fallocate(fd, mode, arg_u64(offset), arg_u64(len)); +} + +asmlinkage long sys_ni_syscall(const struct pt_regs *); +#define __arm64_sys_ni_syscall sys_ni_syscall + +#undef __SYSCALL +#define __SYSCALL(nr, sym) asmlinkage long __arm64_##sym(const struct pt_regs *); +#include <asm/unistd32.h> + +#undef __SYSCALL +#define __SYSCALL(nr, sym) [nr] = (syscall_fn_t)__arm64_##sym, + +const syscall_fn_t compat_sys_call_table[__NR_compat_syscalls] = { + [0 ... __NR_compat_syscalls - 1] = (syscall_fn_t)sys_ni_syscall, #include <asm/unistd32.h> }; diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index a382b2a1b84e..32653d156747 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -27,6 +27,7 @@ #include <linux/uaccess.h> #include <asm/cacheflush.h> +#include <asm/system_misc.h> #include <asm/unistd.h> static long @@ -68,6 +69,7 @@ do_compat_cache_op(unsigned long start, unsigned long end, int flags) long compat_arm_syscall(struct pt_regs *regs) { unsigned int no = regs->regs[7]; + void __user *addr; switch (no) { /* @@ -88,7 +90,7 @@ long compat_arm_syscall(struct pt_regs *regs) return do_compat_cache_op(regs->regs[0], regs->regs[1], regs->regs[2]); case __ARM_NR_compat_set_tls: - current->thread.tp_value = regs->regs[0]; + current->thread.uw.tp_value = regs->regs[0]; /* * Protect against register corruption from context switch. @@ -99,6 +101,21 @@ long compat_arm_syscall(struct pt_regs *regs) return 0; default: - return -ENOSYS; + /* + * Calls 9f00xx..9f07ff are defined to return -ENOSYS + * if not implemented, rather than raising SIGILL. This + * way the calling program can gracefully determine whether + * a feature is supported. + */ + if ((no & 0xffff) <= 0x7ff) + return -ENOSYS; + break; } + + addr = (void __user *)instruction_pointer(regs) - + (compat_thumb_mode(regs) ? 2 : 4); + + arm64_notify_die("Oops - bad compat syscall(2)", regs, + SIGILL, ILL_ILLTRP, addr, no); + return 0; } diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c new file mode 100644 index 000000000000..032d22312881 --- /dev/null +++ b/arch/arm64/kernel/syscall.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/compiler.h> +#include <linux/context_tracking.h> +#include <linux/errno.h> +#include <linux/nospec.h> +#include <linux/ptrace.h> +#include <linux/syscalls.h> + +#include <asm/daifflags.h> +#include <asm/fpsimd.h> +#include <asm/syscall.h> +#include <asm/thread_info.h> +#include <asm/unistd.h> + +long compat_arm_syscall(struct pt_regs *regs); + +long sys_ni_syscall(void); + +asmlinkage long do_ni_syscall(struct pt_regs *regs) +{ +#ifdef CONFIG_COMPAT + long ret; + if (is_compat_task()) { + ret = compat_arm_syscall(regs); + if (ret != -ENOSYS) + return ret; + } +#endif + + return sys_ni_syscall(); +} + +static long __invoke_syscall(struct pt_regs *regs, syscall_fn_t syscall_fn) +{ + return syscall_fn(regs); +} + +static void invoke_syscall(struct pt_regs *regs, unsigned int scno, + unsigned int sc_nr, + const syscall_fn_t syscall_table[]) +{ + long ret; + + if (scno < sc_nr) { + syscall_fn_t syscall_fn; + syscall_fn = syscall_table[array_index_nospec(scno, sc_nr)]; + ret = __invoke_syscall(regs, syscall_fn); + } else { + ret = do_ni_syscall(regs); + } + + regs->regs[0] = ret; +} + +static inline bool has_syscall_work(unsigned long flags) +{ + return unlikely(flags & _TIF_SYSCALL_WORK); +} + +int syscall_trace_enter(struct pt_regs *regs); +void syscall_trace_exit(struct pt_regs *regs); + +static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, + const syscall_fn_t syscall_table[]) +{ + unsigned long flags = current_thread_info()->flags; + + regs->orig_x0 = regs->regs[0]; + regs->syscallno = scno; + + local_daif_restore(DAIF_PROCCTX); + user_exit(); + + if (has_syscall_work(flags)) { + /* set default errno for user-issued syscall(-1) */ + if (scno == NO_SYSCALL) + regs->regs[0] = -ENOSYS; + scno = syscall_trace_enter(regs); + if (scno == NO_SYSCALL) + goto trace_exit; + } + + invoke_syscall(regs, scno, sc_nr, syscall_table); + + /* + * The tracing status may have changed under our feet, so we have to + * check again. However, if we were tracing entry, then we always trace + * exit regardless, as the old entry assembly did. + */ + if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) { + local_daif_mask(); + flags = current_thread_info()->flags; + if (!has_syscall_work(flags)) { + /* + * We're off to userspace, where interrupts are + * always enabled after we restore the flags from + * the SPSR. + */ + trace_hardirqs_on(); + return; + } + local_daif_restore(DAIF_PROCCTX); + } + +trace_exit: + syscall_trace_exit(regs); +} + +static inline void sve_user_discard(void) +{ + if (!system_supports_sve()) + return; + + clear_thread_flag(TIF_SVE); + + /* + * task_fpsimd_load() won't be called to update CPACR_EL1 in + * ret_to_user unless TIF_FOREIGN_FPSTATE is still set, which only + * happens if a context switch or kernel_neon_begin() or context + * modification (sigreturn, ptrace) intervenes. + * So, ensure that CPACR_EL1 is already correct for the fast-path case. + */ + sve_user_disable(); +} + +asmlinkage void el0_svc_handler(struct pt_regs *regs) +{ + sve_user_discard(); + el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table); +} + +#ifdef CONFIG_COMPAT +asmlinkage void el0_svc_compat_handler(struct pt_regs *regs) +{ + el0_svc_common(regs, regs->regs[7], __NR_compat_syscalls, + compat_sys_call_table); +} +#endif diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 21868530018e..0825c4a856e3 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -11,7 +11,9 @@ * for more details. */ +#include <linux/acpi.h> #include <linux/arch_topology.h> +#include <linux/cacheinfo.h> #include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/init.h> @@ -22,6 +24,7 @@ #include <linux/sched.h> #include <linux/sched/topology.h> #include <linux/slab.h> +#include <linux/smp.h> #include <linux/string.h> #include <asm/cpu.h> @@ -47,7 +50,7 @@ static int __init get_cpu_for_node(struct device_node *node) return cpu; } -static int __init parse_core(struct device_node *core, int cluster_id, +static int __init parse_core(struct device_node *core, int package_id, int core_id) { char name[10]; @@ -63,7 +66,7 @@ static int __init parse_core(struct device_node *core, int cluster_id, leaf = false; cpu = get_cpu_for_node(t); if (cpu >= 0) { - cpu_topology[cpu].cluster_id = cluster_id; + cpu_topology[cpu].package_id = package_id; cpu_topology[cpu].core_id = core_id; cpu_topology[cpu].thread_id = i; } else { @@ -85,7 +88,7 @@ static int __init parse_core(struct device_node *core, int cluster_id, return -EINVAL; } - cpu_topology[cpu].cluster_id = cluster_id; + cpu_topology[cpu].package_id = package_id; cpu_topology[cpu].core_id = core_id; } else if (leaf) { pr_err("%pOF: Can't get CPU for leaf core\n", core); @@ -101,7 +104,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth) bool leaf = true; bool has_cores = false; struct device_node *c; - static int cluster_id __initdata; + static int package_id __initdata; int core_id = 0; int i, ret; @@ -140,7 +143,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth) } if (leaf) { - ret = parse_core(c, cluster_id, core_id++); + ret = parse_core(c, package_id, core_id++); } else { pr_err("%pOF: Non-leaf cluster with core %s\n", cluster, name); @@ -158,7 +161,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth) pr_warn("%pOF: empty cluster\n", cluster); if (leaf) - cluster_id++; + package_id++; return 0; } @@ -194,7 +197,7 @@ static int __init parse_dt_topology(void) * only mark cores described in the DT as possible. */ for_each_possible_cpu(cpu) - if (cpu_topology[cpu].cluster_id == -1) + if (cpu_topology[cpu].package_id == -1) ret = -EINVAL; out_map: @@ -212,7 +215,19 @@ EXPORT_SYMBOL_GPL(cpu_topology); const struct cpumask *cpu_coregroup_mask(int cpu) { - return &cpu_topology[cpu].core_sibling; + const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu)); + + /* Find the smaller of NUMA, core or LLC siblings */ + if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) { + /* not numa in package, lets use the package siblings */ + core_mask = &cpu_topology[cpu].core_sibling; + } + if (cpu_topology[cpu].llc_id != -1) { + if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask)) + core_mask = &cpu_topology[cpu].llc_sibling; + } + + return core_mask; } static void update_siblings_masks(unsigned int cpuid) @@ -221,22 +236,25 @@ static void update_siblings_masks(unsigned int cpuid) int cpu; /* update core and thread sibling masks */ - for_each_possible_cpu(cpu) { + for_each_online_cpu(cpu) { cpu_topo = &cpu_topology[cpu]; - if (cpuid_topo->cluster_id != cpu_topo->cluster_id) + if (cpuid_topo->llc_id == cpu_topo->llc_id) { + cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling); + cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling); + } + + if (cpuid_topo->package_id != cpu_topo->package_id) continue; cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); - if (cpu != cpuid) - cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); if (cpuid_topo->core_id != cpu_topo->core_id) continue; cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); - if (cpu != cpuid) - cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); + cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); } } @@ -245,7 +263,7 @@ void store_cpu_topology(unsigned int cpuid) struct cpu_topology *cpuid_topo = &cpu_topology[cpuid]; u64 mpidr; - if (cpuid_topo->cluster_id != -1) + if (cpuid_topo->package_id != -1) goto topology_populated; mpidr = read_cpuid_mpidr(); @@ -259,25 +277,38 @@ void store_cpu_topology(unsigned int cpuid) /* Multiprocessor system : Multi-threads per core */ cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); - cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) | + cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) | MPIDR_AFFINITY_LEVEL(mpidr, 3) << 8; } else { /* Multiprocessor system : Single-thread per core */ cpuid_topo->thread_id = -1; cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) | + cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) | MPIDR_AFFINITY_LEVEL(mpidr, 2) << 8 | MPIDR_AFFINITY_LEVEL(mpidr, 3) << 16; } pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n", - cpuid, cpuid_topo->cluster_id, cpuid_topo->core_id, + cpuid, cpuid_topo->package_id, cpuid_topo->core_id, cpuid_topo->thread_id, mpidr); topology_populated: update_siblings_masks(cpuid); } +static void clear_cpu_topology(int cpu) +{ + struct cpu_topology *cpu_topo = &cpu_topology[cpu]; + + cpumask_clear(&cpu_topo->llc_sibling); + cpumask_set_cpu(cpu, &cpu_topo->llc_sibling); + + cpumask_clear(&cpu_topo->core_sibling); + cpumask_set_cpu(cpu, &cpu_topo->core_sibling); + cpumask_clear(&cpu_topo->thread_sibling); + cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); +} + static void __init reset_cpu_topology(void) { unsigned int cpu; @@ -287,15 +318,80 @@ static void __init reset_cpu_topology(void) cpu_topo->thread_id = -1; cpu_topo->core_id = 0; - cpu_topo->cluster_id = -1; + cpu_topo->package_id = -1; + cpu_topo->llc_id = -1; + + clear_cpu_topology(cpu); + } +} + +void remove_cpu_topology(unsigned int cpu) +{ + int sibling; - cpumask_clear(&cpu_topo->core_sibling); - cpumask_set_cpu(cpu, &cpu_topo->core_sibling); - cpumask_clear(&cpu_topo->thread_sibling); - cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); + for_each_cpu(sibling, topology_core_cpumask(cpu)) + cpumask_clear_cpu(cpu, topology_core_cpumask(sibling)); + for_each_cpu(sibling, topology_sibling_cpumask(cpu)) + cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); + for_each_cpu(sibling, topology_llc_cpumask(cpu)) + cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling)); + + clear_cpu_topology(cpu); +} + +#ifdef CONFIG_ACPI +/* + * Propagate the topology information of the processor_topology_node tree to the + * cpu_topology array. + */ +static int __init parse_acpi_topology(void) +{ + bool is_threaded; + int cpu, topology_id; + + is_threaded = read_cpuid_mpidr() & MPIDR_MT_BITMASK; + + for_each_possible_cpu(cpu) { + int i, cache_id; + + topology_id = find_acpi_cpu_topology(cpu, 0); + if (topology_id < 0) + return topology_id; + + if (is_threaded) { + cpu_topology[cpu].thread_id = topology_id; + topology_id = find_acpi_cpu_topology(cpu, 1); + cpu_topology[cpu].core_id = topology_id; + } else { + cpu_topology[cpu].thread_id = -1; + cpu_topology[cpu].core_id = topology_id; + } + topology_id = find_acpi_cpu_topology_package(cpu); + cpu_topology[cpu].package_id = topology_id; + + i = acpi_find_last_cache_level(cpu); + + if (i > 0) { + /* + * this is the only part of cpu_topology that has + * a direct relationship with the cache topology + */ + cache_id = find_acpi_cpu_cache_topology(cpu, i); + if (cache_id > 0) + cpu_topology[cpu].llc_id = cache_id; + } } + + return 0; } +#else +static inline int __init parse_acpi_topology(void) +{ + return -EINVAL; +} +#endif + void __init init_cpu_topology(void) { reset_cpu_topology(); @@ -304,6 +400,8 @@ void __init init_cpu_topology(void) * Discard anything that was parsed if we hit an error so we * don't use partial information. */ - if (of_have_populated_dt() && parse_dt_topology()) + if (!acpi_disabled && parse_acpi_topology()) + reset_cpu_topology(); + else if (of_have_populated_dt() && parse_dt_topology()) reset_cpu_topology(); } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index eb2d15147e8d..5f4d9acb32f5 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -38,6 +38,7 @@ #include <asm/atomic.h> #include <asm/bug.h> +#include <asm/cpufeature.h> #include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/esr.h> @@ -223,13 +224,61 @@ void die(const char *str, struct pt_regs *regs, int err) do_exit(SIGSEGV); } +static void arm64_show_signal(int signo, const char *str) +{ + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + struct task_struct *tsk = current; + unsigned int esr = tsk->thread.fault_code; + struct pt_regs *regs = task_pt_regs(tsk); + + /* Leave if the signal won't be shown */ + if (!show_unhandled_signals || + !unhandled_signal(tsk, signo) || + !__ratelimit(&rs)) + return; + + pr_info("%s[%d]: unhandled exception: ", tsk->comm, task_pid_nr(tsk)); + if (esr) + pr_cont("%s, ESR 0x%08x, ", esr_get_class_string(esr), esr); + + pr_cont("%s", str); + print_vma_addr(KERN_CONT " in ", regs->pc); + pr_cont("\n"); + __show_regs(regs); +} + +void arm64_force_sig_fault(int signo, int code, void __user *addr, + const char *str) +{ + arm64_show_signal(signo, str); + force_sig_fault(signo, code, addr, current); +} + +void arm64_force_sig_mceerr(int code, void __user *addr, short lsb, + const char *str) +{ + arm64_show_signal(SIGBUS, str); + force_sig_mceerr(code, addr, lsb, current); +} + +void arm64_force_sig_ptrace_errno_trap(int errno, void __user *addr, + const char *str) +{ + arm64_show_signal(SIGTRAP, str); + force_sig_ptrace_errno_trap(errno, addr); +} + void arm64_notify_die(const char *str, struct pt_regs *regs, - struct siginfo *info, int err) + int signo, int sicode, void __user *addr, + int err) { if (user_mode(regs)) { + WARN_ON(regs != current_pt_regs()); current->thread.fault_address = 0; current->thread.fault_code = err; - force_sig_info(info->si_signo, info, current); + + arm64_force_sig_fault(signo, sicode, addr, str); } else { die(str, regs, err); } @@ -243,7 +292,8 @@ void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size) * If we were single stepping, we want to get the step exception after * we return from the trap. */ - user_fastforward_single_step(current); + if (user_mode(regs)) + user_fastforward_single_step(current); } static LIST_HEAD(undef_hook); @@ -275,10 +325,12 @@ static int call_undef_hook(struct pt_regs *regs) int (*fn)(struct pt_regs *regs, u32 instr) = NULL; void __user *pc = (void __user *)instruction_pointer(regs); - if (!user_mode(regs)) - return 1; - - if (compat_thumb_mode(regs)) { + if (!user_mode(regs)) { + __le32 instr_le; + if (probe_kernel_address((__force __le32 *)pc, instr_le)) + goto exit; + instr = le32_to_cpu(instr_le); + } else if (compat_thumb_mode(regs)) { /* 16-bit Thumb instruction */ __le16 instr_le; if (get_user(instr_le, (__le16 __user *)pc)) @@ -311,12 +363,13 @@ exit: return fn ? fn(regs, instr) : 1; } -void force_signal_inject(int signal, int code, struct pt_regs *regs, - unsigned long address) +void force_signal_inject(int signal, int code, unsigned long address) { - siginfo_t info; - void __user *pc = (void __user *)instruction_pointer(regs); const char *desc; + struct pt_regs *regs = current_pt_regs(); + + if (WARN_ON(!user_mode(regs))) + return; switch (signal) { case SIGILL: @@ -330,25 +383,19 @@ void force_signal_inject(int signal, int code, struct pt_regs *regs, break; } - if (unhandled_signal(current, signal) && - show_unhandled_signals_ratelimited()) { - pr_info("%s[%d]: %s: pc=%p\n", - current->comm, task_pid_nr(current), desc, pc); - dump_instr(KERN_INFO, regs); + /* Force signals we don't understand to SIGKILL */ + if (WARN_ON(signal != SIGKILL && + siginfo_layout(signal, code) != SIL_FAULT)) { + signal = SIGKILL; } - info.si_signo = signal; - info.si_errno = 0; - info.si_code = code; - info.si_addr = pc; - - arm64_notify_die(desc, regs, &info, 0); + arm64_notify_die(desc, regs, signal, code, (void __user *)address, 0); } /* * Set up process info to signal segmentation fault - called on access error. */ -void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr) +void arm64_notify_segfault(unsigned long addr) { int code; @@ -359,7 +406,7 @@ void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr) code = SEGV_ACCERR; up_read(¤t->mm->mmap_sem); - force_signal_inject(SIGSEGV, code, regs, addr); + force_signal_inject(SIGSEGV, code, addr); } asmlinkage void __exception do_undefinstr(struct pt_regs *regs) @@ -371,13 +418,8 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) if (call_undef_hook(regs) == 0) return; - force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); -} - -int cpu_enable_cache_maint_trap(void *__unused) -{ - config_sctlr_el1(SCTLR_EL1_UCI, 0); - return 0; + BUG_ON(!user_mode(regs)); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); } #define __user_cache_maint(insn, address, res) \ @@ -403,7 +445,7 @@ int cpu_enable_cache_maint_trap(void *__unused) static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) { unsigned long address; - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; int ret = 0; @@ -426,19 +468,19 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) __user_cache_maint("ic ivau", address, ret); break; default: - force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); return; } if (ret) - arm64_notify_segfault(regs, address); + arm64_notify_segfault(address); else arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) { - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0); pt_regs_write_reg(regs, rt, val); @@ -448,7 +490,7 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) { - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); pt_regs_write_reg(regs, rt, arch_counter_get_cntvct()); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); @@ -456,12 +498,28 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) static void cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) { - int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + int rt = ESR_ELx_SYS64_ISS_RT(esr); pt_regs_write_reg(regs, rt, arch_timer_get_rate()); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } +static void mrs_handler(unsigned int esr, struct pt_regs *regs) +{ + u32 sysreg, rt; + + rt = ESR_ELx_SYS64_ISS_RT(esr); + sysreg = esr_sys64_to_sysreg(esr); + + if (do_emulate_mrs(regs, sysreg, rt) != 0) + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); +} + +static void wfi_handler(unsigned int esr, struct pt_regs *regs) +{ + arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); +} + struct sys64_hook { unsigned int esr_mask; unsigned int esr_val; @@ -492,9 +550,176 @@ static struct sys64_hook sys64_hooks[] = { .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ, .handler = cntfrq_read_handler, }, + { + /* Trap read access to CPUID registers */ + .esr_mask = ESR_ELx_SYS64_ISS_SYS_MRS_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL, + .handler = mrs_handler, + }, + { + /* Trap WFI instructions executed in userspace */ + .esr_mask = ESR_ELx_WFx_MASK, + .esr_val = ESR_ELx_WFx_WFI_VAL, + .handler = wfi_handler, + }, + {}, +}; + + +#ifdef CONFIG_COMPAT +#define PSTATE_IT_1_0_SHIFT 25 +#define PSTATE_IT_1_0_MASK (0x3 << PSTATE_IT_1_0_SHIFT) +#define PSTATE_IT_7_2_SHIFT 10 +#define PSTATE_IT_7_2_MASK (0x3f << PSTATE_IT_7_2_SHIFT) + +static u32 compat_get_it_state(struct pt_regs *regs) +{ + u32 it, pstate = regs->pstate; + + it = (pstate & PSTATE_IT_1_0_MASK) >> PSTATE_IT_1_0_SHIFT; + it |= ((pstate & PSTATE_IT_7_2_MASK) >> PSTATE_IT_7_2_SHIFT) << 2; + + return it; +} + +static void compat_set_it_state(struct pt_regs *regs, u32 it) +{ + u32 pstate_it; + + pstate_it = (it << PSTATE_IT_1_0_SHIFT) & PSTATE_IT_1_0_MASK; + pstate_it |= ((it >> 2) << PSTATE_IT_7_2_SHIFT) & PSTATE_IT_7_2_MASK; + + regs->pstate &= ~PSR_AA32_IT_MASK; + regs->pstate |= pstate_it; +} + +static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs) +{ + int cond; + + /* Only a T32 instruction can trap without CV being set */ + if (!(esr & ESR_ELx_CV)) { + u32 it; + + it = compat_get_it_state(regs); + if (!it) + return true; + + cond = it >> 4; + } else { + cond = (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT; + } + + return aarch32_opcode_cond_checks[cond](regs->pstate); +} + +static void advance_itstate(struct pt_regs *regs) +{ + u32 it; + + /* ARM mode */ + if (!(regs->pstate & PSR_AA32_T_BIT) || + !(regs->pstate & PSR_AA32_IT_MASK)) + return; + + it = compat_get_it_state(regs); + + /* + * If this is the last instruction of the block, wipe the IT + * state. Otherwise advance it. + */ + if (!(it & 7)) + it = 0; + else + it = (it & 0xe0) | ((it << 1) & 0x1f); + + compat_set_it_state(regs, it); +} + +static void arm64_compat_skip_faulting_instruction(struct pt_regs *regs, + unsigned int sz) +{ + advance_itstate(regs); + arm64_skip_faulting_instruction(regs, sz); +} + +static void compat_cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int reg = (esr & ESR_ELx_CP15_32_ISS_RT_MASK) >> ESR_ELx_CP15_32_ISS_RT_SHIFT; + + pt_regs_write_reg(regs, reg, arch_timer_get_rate()); + arm64_compat_skip_faulting_instruction(regs, 4); +} + +static struct sys64_hook cp15_32_hooks[] = { + { + .esr_mask = ESR_ELx_CP15_32_ISS_SYS_MASK, + .esr_val = ESR_ELx_CP15_32_ISS_SYS_CNTFRQ, + .handler = compat_cntfrq_read_handler, + }, {}, }; +static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs) +{ + int rt = (esr & ESR_ELx_CP15_64_ISS_RT_MASK) >> ESR_ELx_CP15_64_ISS_RT_SHIFT; + int rt2 = (esr & ESR_ELx_CP15_64_ISS_RT2_MASK) >> ESR_ELx_CP15_64_ISS_RT2_SHIFT; + u64 val = arch_counter_get_cntvct(); + + pt_regs_write_reg(regs, rt, lower_32_bits(val)); + pt_regs_write_reg(regs, rt2, upper_32_bits(val)); + arm64_compat_skip_faulting_instruction(regs, 4); +} + +static struct sys64_hook cp15_64_hooks[] = { + { + .esr_mask = ESR_ELx_CP15_64_ISS_SYS_MASK, + .esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCT, + .handler = compat_cntvct_read_handler, + }, + {}, +}; + +asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs) +{ + struct sys64_hook *hook, *hook_base; + + if (!cp15_cond_valid(esr, regs)) { + /* + * There is no T16 variant of a CP access, so we + * always advance PC by 4 bytes. + */ + arm64_compat_skip_faulting_instruction(regs, 4); + return; + } + + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_CP15_32: + hook_base = cp15_32_hooks; + break; + case ESR_ELx_EC_CP15_64: + hook_base = cp15_64_hooks; + break; + default: + do_undefinstr(regs); + return; + } + + for (hook = hook_base; hook->handler; hook++) + if ((hook->esr_mask & esr) == hook->esr_val) { + hook->handler(esr, regs); + return; + } + + /* + * New cp15 instructions may previously have been undefined at + * EL0. Fall back to our usual undefined instruction handler + * so that we handle these consistently. + */ + do_undefinstr(regs); +} +#endif + asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) { struct sys64_hook *hook; @@ -513,22 +738,6 @@ asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) do_undefinstr(regs); } -long compat_arm_syscall(struct pt_regs *regs); - -asmlinkage long do_ni_syscall(struct pt_regs *regs) -{ -#ifdef CONFIG_COMPAT - long ret; - if (is_compat_task()) { - ret = compat_arm_syscall(regs); - if (ret != -ENOSYS) - return ret; - } -#endif - - return sys_ni_syscall(); -} - static const char *esr_class_str[] = { [0 ... ESR_ELx_EC_MAX] = "UNRECOGNIZED EC", [ESR_ELx_EC_UNKNOWN] = "Unknown/Uncategorized", @@ -587,7 +796,6 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) handler[reason], smp_processor_id(), esr, esr_get_class_string(esr)); - die("Oops - bad mode", regs, 0); local_daif_mask(); panic("bad mode"); } @@ -598,23 +806,13 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) */ asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) { - siginfo_t info; void __user *pc = (void __user *)instruction_pointer(regs); - console_verbose(); - - pr_crit("Bad EL0 synchronous exception detected on CPU%d, code 0x%08x -- %s\n", - smp_processor_id(), esr, esr_get_class_string(esr)); - __show_regs(regs); - - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_ILLOPC; - info.si_addr = pc; current->thread.fault_address = 0; - current->thread.fault_code = 0; + current->thread.fault_code = esr; - force_sig_info(info.si_signo, &info, current); + arm64_force_sig_fault(SIGILL, ILL_ILLOPC, pc, + "Bad EL0 synchronous exception"); } #ifdef CONFIG_VMAP_STACK diff --git a/arch/arm64/kernel/vdso/note.S b/arch/arm64/kernel/vdso/note.S index b82c85e5d972..e20483b104d9 100644 --- a/arch/arm64/kernel/vdso/note.S +++ b/arch/arm64/kernel/vdso/note.S @@ -22,7 +22,10 @@ #include <linux/uts.h> #include <linux/version.h> #include <linux/elfnote.h> +#include <linux/build-salt.h> ELFNOTE_START(Linux, 0, "a") .long LINUX_VERSION_CODE ELFNOTE_END + +BUILD_SALT diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 0221aca6493d..03b00007553d 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -34,25 +34,25 @@ jiffies = jiffies_64; * 4 KB (see related ASSERT() below) \ */ \ . = ALIGN(SZ_4K); \ - VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ + __hyp_idmap_text_start = .; \ *(.hyp.idmap.text) \ - VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; \ - VMLINUX_SYMBOL(__hyp_text_start) = .; \ + __hyp_idmap_text_end = .; \ + __hyp_text_start = .; \ *(.hyp.text) \ - VMLINUX_SYMBOL(__hyp_text_end) = .; + __hyp_text_end = .; #define IDMAP_TEXT \ . = ALIGN(SZ_4K); \ - VMLINUX_SYMBOL(__idmap_text_start) = .; \ + __idmap_text_start = .; \ *(.idmap.text) \ - VMLINUX_SYMBOL(__idmap_text_end) = .; + __idmap_text_end = .; #ifdef CONFIG_HIBERNATION #define HIBERNATE_TEXT \ . = ALIGN(SZ_4K); \ - VMLINUX_SYMBOL(__hibernate_exit_text_start) = .;\ + __hibernate_exit_text_start = .; \ *(.hibernate_exit.text) \ - VMLINUX_SYMBOL(__hibernate_exit_text_end) = .; + __hibernate_exit_text_end = .; #else #define HIBERNATE_TEXT #endif @@ -60,10 +60,10 @@ jiffies = jiffies_64; #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 #define TRAMP_TEXT \ . = ALIGN(PAGE_SIZE); \ - VMLINUX_SYMBOL(__entry_tramp_text_start) = .; \ + __entry_tramp_text_start = .; \ *(.entry.tramp.text) \ . = ALIGN(PAGE_SIZE); \ - VMLINUX_SYMBOL(__entry_tramp_text_end) = .; + __entry_tramp_text_end = .; #else #define TRAMP_TEXT #endif @@ -138,6 +138,23 @@ SECTIONS EXCEPTION_TABLE(8) /* __init_begin will be marked RO NX */ NOTES + . = ALIGN(PAGE_SIZE); + idmap_pg_dir = .; + . += IDMAP_DIR_SIZE; + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + tramp_pg_dir = .; + . += PAGE_SIZE; +#endif + +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + reserved_ttbr0 = .; + . += RESERVED_TTBR0_SIZE; +#endif + swapper_pg_dir = .; + . += PAGE_SIZE; + swapper_pg_end = .; + . = ALIGN(SEGMENT_ALIGN); __init_begin = .; __inittext_begin = .; @@ -166,7 +183,6 @@ SECTIONS INIT_SETUP(16) INIT_CALLS CON_INITCALL - SECURITY_INITCALL INIT_RAM_FS *(.init.rodata.* .init.bss) /* from the EFI stub */ } @@ -216,21 +232,9 @@ SECTIONS BSS_SECTION(0, 0, 0) . = ALIGN(PAGE_SIZE); - idmap_pg_dir = .; - . += IDMAP_DIR_SIZE; - -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - tramp_pg_dir = .; - . += PAGE_SIZE; -#endif - -#ifdef CONFIG_ARM64_SW_TTBR0_PAN - reserved_ttbr0 = .; - . += RESERVED_TTBR0_SIZE; -#endif - swapper_pg_dir = .; - . += SWAPPER_DIR_SIZE; - swapper_pg_end = .; + init_pg_dir = .; + . += INIT_DIR_SIZE; + init_pg_end = .; __pecoff_data_size = ABSOLUTE(. - __initdata_begin); _end = .; |

