diff options
Diffstat (limited to 'arch')
69 files changed, 740 insertions, 290 deletions
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index b4441b0764d7..a045f3086047 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -149,7 +149,7 @@ config ARC_CPU_770 Support for ARC770 core introduced with Rel 4.10 (Summer 2011) This core has a bunch of cool new features: -MMU-v3: Variable Page Sz (4k, 8k, 16k), bigger J-TLB (128x4) - Shared Address Spaces (for sharing TLB entires in MMU) + Shared Address Spaces (for sharing TLB entries in MMU) -Caches: New Prog Model, Region Flush -Insns: endian swap, load-locked/store-conditional, time-stamp-ctr diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 99cce77ab98f..644815c0516e 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -6,33 +6,11 @@ # published by the Free Software Foundation. # -ifeq ($(CROSS_COMPILE),) -ifndef CONFIG_CPU_BIG_ENDIAN -CROSS_COMPILE := arc-linux- -else -CROSS_COMPILE := arceb-linux- -endif -endif - KBUILD_DEFCONFIG := nsim_700_defconfig cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 -cflags-$(CONFIG_ISA_ARCV2) += -mcpu=archs - -is_700 = $(shell $(CC) -dM -E - < /dev/null | grep -q "ARC700" && echo 1 || echo 0) - -ifdef CONFIG_ISA_ARCOMPACT -ifeq ($(is_700), 0) - $(error Toolchain not configured for ARCompact builds) -endif -endif - -ifdef CONFIG_ISA_ARCV2 -ifeq ($(is_700), 1) - $(error Toolchain not configured for ARCv2 builds) -endif -endif +cflags-$(CONFIG_ISA_ARCV2) += -mcpu=hs38 ifdef CONFIG_ARC_CURR_IN_REG # For a global register defintion, make sure it gets passed to every file @@ -79,7 +57,7 @@ cflags-$(disable_small_data) += -mno-sdata -fcall-used-gp cflags-$(CONFIG_CPU_BIG_ENDIAN) += -mbig-endian ldflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB -LIBGCC := $(shell $(CC) $(cflags-y) --print-libgcc-file-name) +LIBGCC = $(shell $(CC) $(cflags-y) --print-libgcc-file-name) # Modules with short calls might break for calls into builtin-kernel KBUILD_CFLAGS_MODULE += -mlong-calls -mno-millicode diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 4674541eba3f..8ce6e7235915 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -241,6 +241,26 @@ int copy_thread(unsigned long clone_flags, task_thread_info(current)->thr_ptr; } + + /* + * setup usermode thread pointer #1: + * when child is picked by scheduler, __switch_to() uses @c_callee to + * populate usermode callee regs: this works (despite being in a kernel + * function) since special return path for child @ret_from_fork() + * ensures those regs are not clobbered all the way to RTIE to usermode + */ + c_callee->r25 = task_thread_info(p)->thr_ptr; + +#ifdef CONFIG_ARC_CURR_IN_REG + /* + * setup usermode thread pointer #2: + * however for this special use of r25 in kernel, __switch_to() sets + * r25 for kernel needs and only in the final return path is usermode + * r25 setup, from pt_regs->user_r25. So set that up as well + */ + c_regs->user_r25 = c_callee->r25; +#endif + return 0; } diff --git a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts index b10dccd0958f..3b1baa8605a7 100644 --- a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts +++ b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts @@ -11,6 +11,7 @@ #include "sama5d2-pinfunc.h" #include <dt-bindings/mfd/atmel-flexcom.h> #include <dt-bindings/gpio/gpio.h> +#include <dt-bindings/pinctrl/at91.h> / { model = "Atmel SAMA5D2 PTC EK"; @@ -299,6 +300,7 @@ <PIN_PA30__NWE_NANDWE>, <PIN_PB2__NRD_NANDOE>; bias-pull-up; + atmel,drive-strength = <ATMEL_PIO_DRVSTR_ME>; }; ale_cle_rdy_cs { diff --git a/arch/arm/boot/dts/bcm63138.dtsi b/arch/arm/boot/dts/bcm63138.dtsi index 43ee992ccdcf..6df61518776f 100644 --- a/arch/arm/boot/dts/bcm63138.dtsi +++ b/arch/arm/boot/dts/bcm63138.dtsi @@ -106,21 +106,23 @@ global_timer: timer@1e200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x1e200 0x20>; - interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>; clocks = <&axi_clk>; }; local_timer: local-timer@1e600 { compatible = "arm,cortex-a9-twd-timer"; reg = <0x1e600 0x20>; - interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) | + IRQ_TYPE_EDGE_RISING)>; clocks = <&axi_clk>; }; twd_watchdog: watchdog@1e620 { compatible = "arm,cortex-a9-twd-wdt"; reg = <0x1e620 0x20>; - interrupts = <GIC_PPI 14 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | + IRQ_TYPE_LEVEL_HIGH)>; }; armpll: armpll { @@ -158,7 +160,7 @@ serial0: serial@600 { compatible = "brcm,bcm6345-uart"; reg = <0x600 0x1b>; - interrupts = <GIC_SPI 32 0>; + interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>; clocks = <&periph_clk>; clock-names = "periph"; status = "disabled"; @@ -167,7 +169,7 @@ serial1: serial@620 { compatible = "brcm,bcm6345-uart"; reg = <0x620 0x1b>; - interrupts = <GIC_SPI 33 0>; + interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>; clocks = <&periph_clk>; clock-names = "periph"; status = "disabled"; @@ -180,7 +182,7 @@ reg = <0x2000 0x600>, <0xf0 0x10>; reg-names = "nand", "nand-int-base"; status = "disabled"; - interrupts = <GIC_SPI 38 0>; + interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "nand"; }; diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi index 7423d462d1e4..50dde84b72ed 100644 --- a/arch/arm/boot/dts/imx53-qsb-common.dtsi +++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi @@ -123,6 +123,17 @@ }; }; +&cpu0 { + /* CPU rated to 1GHz, not 1.2GHz as per the default settings */ + operating-points = < + /* kHz uV */ + 166666 850000 + 400000 900000 + 800000 1050000 + 1000000 1200000 + >; +}; + &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi index 661be948ab74..185541a5b69f 100644 --- a/arch/arm/boot/dts/stm32mp157c.dtsi +++ b/arch/arm/boot/dts/stm32mp157c.dtsi @@ -1078,8 +1078,8 @@ interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>; clocks = <&rcc SPI6_K>; resets = <&rcc SPI6_R>; - dmas = <&mdma1 34 0x0 0x40008 0x0 0x0 0>, - <&mdma1 35 0x0 0x40002 0x0 0x0 0>; + dmas = <&mdma1 34 0x0 0x40008 0x0 0x0>, + <&mdma1 35 0x0 0x40002 0x0 0x0>; dma-names = "rx", "tx"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun8i-r40.dtsi b/arch/arm/boot/dts/sun8i-r40.dtsi index ffd9f00f74a4..5f547c161baf 100644 --- a/arch/arm/boot/dts/sun8i-r40.dtsi +++ b/arch/arm/boot/dts/sun8i-r40.dtsi @@ -800,8 +800,7 @@ }; hdmi_phy: hdmi-phy@1ef0000 { - compatible = "allwinner,sun8i-r40-hdmi-phy", - "allwinner,sun50i-a64-hdmi-phy"; + compatible = "allwinner,sun8i-r40-hdmi-phy"; reg = <0x01ef0000 0x10000>; clocks = <&ccu CLK_BUS_HDMI1>, <&ccu CLK_HDMI_SLOW>, <&ccu 7>, <&ccu 16>; diff --git a/arch/arm/kernel/vmlinux.lds.h b/arch/arm/kernel/vmlinux.lds.h index ae5fdff18406..8247bc15addc 100644 --- a/arch/arm/kernel/vmlinux.lds.h +++ b/arch/arm/kernel/vmlinux.lds.h @@ -49,6 +49,8 @@ #define ARM_DISCARD \ *(.ARM.exidx.exit.text) \ *(.ARM.extab.exit.text) \ + *(.ARM.exidx.text.exit) \ + *(.ARM.extab.text.exit) \ ARM_CPU_DISCARD(*(.ARM.exidx.cpuexit.text)) \ ARM_CPU_DISCARD(*(.ARM.extab.cpuexit.text)) \ ARM_EXIT_DISCARD(EXIT_TEXT) \ diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index fc91205ff46c..5bf9443cfbaa 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -473,7 +473,7 @@ void pci_ioremap_set_mem_type(int mem_type) int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr) { - BUG_ON(offset + SZ_64K > IO_SPACE_LIMIT); + BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT); return ioremap_page_range(PCI_IO_VIRT_BASE + offset, PCI_IO_VIRT_BASE + offset + SZ_64K, diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index fbc74b5fa3ed..8edf93b4490f 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -413,3 +413,4 @@ 396 common pkey_free sys_pkey_free 397 common statx sys_statx 398 common rseq sys_rseq +399 common io_pgetevents sys_io_pgetevents diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 8e38d5267f22..e213f8e867f6 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -966,6 +966,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, return 0; } +static int armv8pmu_filter_match(struct perf_event *event) +{ + unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT; + return evtype != ARMV8_PMUV3_PERFCTR_CHAIN; +} + static void armv8pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; @@ -1114,6 +1120,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->stop = armv8pmu_stop, cpu_pmu->reset = armv8pmu_reset, cpu_pmu->set_event_filter = armv8pmu_set_event_filter; + cpu_pmu->filter_match = armv8pmu_filter_match; return 0; } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 5b4fac434c84..b3354ff94e79 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -64,6 +64,9 @@ #include <asm/xen/hypervisor.h> #include <asm/mmu_context.h> +static int num_standard_resources; +static struct resource *standard_resources; + phys_addr_t __fdt_pointer __initdata; /* @@ -206,14 +209,19 @@ static void __init request_standard_resources(void) { struct memblock_region *region; struct resource *res; + unsigned long i = 0; kernel_code.start = __pa_symbol(_text); kernel_code.end = __pa_symbol(__init_begin - 1); kernel_data.start = __pa_symbol(_sdata); kernel_data.end = __pa_symbol(_end - 1); + num_standard_resources = memblock.memory.cnt; + standard_resources = alloc_bootmem_low(num_standard_resources * + sizeof(*standard_resources)); + for_each_memblock(memory, region) { - res = alloc_bootmem_low(sizeof(*res)); + res = &standard_resources[i++]; if (memblock_is_nomap(region)) { res->name = "reserved"; res->flags = IORESOURCE_MEM; @@ -243,36 +251,26 @@ static void __init request_standard_resources(void) static int __init reserve_memblock_reserved_regions(void) { - phys_addr_t start, end, roundup_end = 0; - struct resource *mem, *res; - u64 i; - - for_each_reserved_mem_region(i, &start, &end) { - if (end <= roundup_end) - continue; /* done already */ - - start = __pfn_to_phys(PFN_DOWN(start)); - end = __pfn_to_phys(PFN_UP(end)) - 1; - roundup_end = end; - - res = kzalloc(sizeof(*res), GFP_ATOMIC); - if (WARN_ON(!res)) - return -ENOMEM; - res->start = start; - res->end = end; - res->name = "reserved"; - res->flags = IORESOURCE_MEM; - - mem = request_resource_conflict(&iomem_resource, res); - /* - * We expected memblock_reserve() regions to conflict with - * memory created by request_standard_resources(). - */ - if (WARN_ON_ONCE(!mem)) + u64 i, j; + + for (i = 0; i < num_standard_resources; ++i) { + struct resource *mem = &standard_resources[i]; + phys_addr_t r_start, r_end, mem_size = resource_size(mem); + + if (!memblock_is_region_reserved(mem->start, mem_size)) continue; - kfree(res); - reserve_region_with_split(mem, start, end, "reserved"); + for_each_reserved_mem_region(j, &r_start, &r_end) { + resource_size_t start, end; + + start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start); + end = min(PFN_PHYS(PFN_UP(r_end)) - 1, mem->end); + + if (start > mem->end || end < mem->start) + continue; + + reserve_region_with_split(mem, start, end, "reserved"); + } } return 0; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 07256b08226c..a6c9fbaeaefc 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -57,6 +57,45 @@ static u64 core_reg_offset_from_id(u64 id) return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); } +static int validate_core_offset(const struct kvm_one_reg *reg) +{ + u64 off = core_reg_offset_from_id(reg->id); + int size; + + switch (off) { + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... + KVM_REG_ARM_CORE_REG(regs.regs[30]): + case KVM_REG_ARM_CORE_REG(regs.sp): + case KVM_REG_ARM_CORE_REG(regs.pc): + case KVM_REG_ARM_CORE_REG(regs.pstate): + case KVM_REG_ARM_CORE_REG(sp_el1): + case KVM_REG_ARM_CORE_REG(elr_el1): + case KVM_REG_ARM_CORE_REG(spsr[0]) ... + KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): + size = sizeof(__u64); + break; + + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + size = sizeof(__uint128_t); + break; + + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + size = sizeof(__u32); + break; + + default: + return -EINVAL; + } + + if (KVM_REG_SIZE(reg->id) == size && + IS_ALIGNED(off, size / sizeof(__u32))) + return 0; + + return -EINVAL; +} + static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { /* @@ -76,6 +115,9 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; + if (validate_core_offset(reg)) + return -EINVAL; + if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id))) return -EFAULT; @@ -98,6 +140,9 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; + if (validate_core_offset(reg)) + return -EINVAL; + if (KVM_REG_SIZE(reg->id) > sizeof(tmp)) return -EINVAL; @@ -107,17 +152,25 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) { - u32 mode = (*(u32 *)valp) & PSR_AA32_MODE_MASK; + u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK; switch (mode) { case PSR_AA32_MODE_USR: + if (!system_supports_32bit_el0()) + return -EINVAL; + break; case PSR_AA32_MODE_FIQ: case PSR_AA32_MODE_IRQ: case PSR_AA32_MODE_SVC: case PSR_AA32_MODE_ABT: case PSR_AA32_MODE_UND: + if (!vcpu_el1_is_32bit(vcpu)) + return -EINVAL; + break; case PSR_MODE_EL0t: case PSR_MODE_EL1t: case PSR_MODE_EL1h: + if (vcpu_el1_is_32bit(vcpu)) + return -EINVAL; break; default: err = -EINVAL; diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 192b3ba07075..f58ea503ad01 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -117,11 +117,14 @@ static pte_t get_clear_flush(struct mm_struct *mm, /* * If HW_AFDBM is enabled, then the HW could turn on - * the dirty bit for any page in the set, so check - * them all. All hugetlb entries are already young. + * the dirty or accessed bit for any page in the set, + * so check them all. */ if (pte_dirty(pte)) orig_pte = pte_mkdirty(orig_pte); + + if (pte_young(pte)) + orig_pte = pte_mkyoung(orig_pte); } if (valid) { @@ -320,11 +323,40 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, return get_clear_flush(mm, addr, ptep, pgsize, ncontig); } +/* + * huge_ptep_set_access_flags will update access flags (dirty, accesssed) + * and write permission. + * + * For a contiguous huge pte range we need to check whether or not write + * permission has to change only on the first pte in the set. Then for + * all the contiguous ptes we need to check whether or not there is a + * discrepancy between dirty or young. + */ +static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig) +{ + int i; + + if (pte_write(pte) != pte_write(huge_ptep_get(ptep))) + return 1; + + for (i = 0; i < ncontig; i++) { + pte_t orig_pte = huge_ptep_get(ptep + i); + + if (pte_dirty(pte) != pte_dirty(orig_pte)) + return 1; + + if (pte_young(pte) != pte_young(orig_pte)) + return 1; + } + + return 0; +} + int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty) { - int ncontig, i, changed = 0; + int ncontig, i; size_t pgsize = 0; unsigned long pfn = pte_pfn(pte), dpfn; pgprot_t hugeprot; @@ -336,19 +368,23 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); dpfn = pgsize >> PAGE_SHIFT; + if (!__cont_access_flags_changed(ptep, pte, ncontig)) + return 0; + orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); - if (!pte_same(orig_pte, pte)) - changed = 1; - /* Make sure we don't lose the dirty state */ + /* Make sure we don't lose the dirty or young state */ if (pte_dirty(orig_pte)) pte = pte_mkdirty(pte); + if (pte_young(orig_pte)) + pte = pte_mkyoung(pte); + hugeprot = pte_pgprot(pte); for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot)); - return changed; + return 1; } void huge_ptep_set_wrprotect(struct mm_struct *mm, diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index b2fa62922d88..49d6046ca1d0 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -13,6 +13,7 @@ #include <linux/atomic.h> #include <linux/cpumask.h> +#include <linux/sizes.h> #include <linux/threads.h> #include <asm/cachectl.h> @@ -80,11 +81,10 @@ extern unsigned int vced_count, vcei_count; #endif -/* - * One page above the stack is used for branch delay slot "emulation". - * See dsemul.c for details. - */ -#define STACK_TOP ((TASK_SIZE & PAGE_MASK) - PAGE_SIZE) +#define VDSO_RANDOMIZE_SIZE (TASK_IS_32BIT_ADDR ? SZ_1M : SZ_256M) + +extern unsigned long mips_stack_top(void); +#define STACK_TOP mips_stack_top() /* * This decides where the kernel will search for a free chunk of vm diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 8fc69891e117..d4f7fd4550e1 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -32,6 +32,7 @@ #include <linux/nmi.h> #include <linux/cpu.h> +#include <asm/abi.h> #include <asm/asm.h> #include <asm/bootinfo.h> #include <asm/cpu.h> @@ -39,6 +40,7 @@ #include <asm/dsp.h> #include <asm/fpu.h> #include <asm/irq.h> +#include <asm/mips-cps.h> #include <asm/msa.h> #include <asm/pgtable.h> #include <asm/mipsregs.h> @@ -645,6 +647,29 @@ out: return pc; } +unsigned long mips_stack_top(void) +{ + unsigned long top = TASK_SIZE & PAGE_MASK; + + /* One page for branch delay slot "emulation" */ + top -= PAGE_SIZE; + + /* Space for the VDSO, data page & GIC user page */ + top -= PAGE_ALIGN(current->thread.abi->vdso->size); + top -= PAGE_SIZE; + top -= mips_gic_present() ? PAGE_SIZE : 0; + + /* Space for cache colour alignment */ + if (cpu_has_dc_aliases) + top -= shm_align_mask + 1; + + /* Space to randomize the VDSO base */ + if (current->flags & PF_RANDOMIZE) + top -= VDSO_RANDOMIZE_SIZE; + + return top; +} + /* * Don't forget that the stack pointer must be aligned on a 8 bytes * boundary for 32-bits ABI and 16 bytes for 64-bits ABI. diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index c71d1eb7da59..8aaaa42f91ed 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -846,6 +846,34 @@ static void __init arch_mem_init(char **cmdline_p) struct memblock_region *reg; extern void plat_mem_setup(void); + /* + * Initialize boot_command_line to an innocuous but non-empty string in + * order to prevent early_init_dt_scan_chosen() from copying + * CONFIG_CMDLINE into it without our knowledge. We handle + * CONFIG_CMDLINE ourselves below & don't want to duplicate its + * content because repeating arguments can be problematic. + */ + strlcpy(boot_command_line, " ", COMMAND_LINE_SIZE); + + /* call board setup routine */ + plat_mem_setup(); + + /* + * Make sure all kernel memory is in the maps. The "UP" and + * "DOWN" are opposite for initdata since if it crosses over + * into another memory section you don't want that to be + * freed when the initdata is freed. + */ + arch_mem_addpart(PFN_DOWN(__pa_symbol(&_text)) << PAGE_SHIFT, + PFN_UP(__pa_symbol(&_edata)) << PAGE_SHIFT, + BOOT_MEM_RAM); + arch_mem_addpart(PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT, + PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT, + BOOT_MEM_INIT_RAM); + + pr_info("Determined physical RAM map:\n"); + print_memory_map(); + #if defined(CONFIG_CMDLINE_BOOL) && defined(CONFIG_CMDLINE_OVERRIDE) strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); #else @@ -873,26 +901,6 @@ static void __init arch_mem_init(char **cmdline_p) } #endif #endif - - /* call board setup routine */ - plat_mem_setup(); - - /* - * Make sure all kernel memory is in the maps. The "UP" and - * "DOWN" are opposite for initdata since if it crosses over - * into another memory section you don't want that to be - * freed when the initdata is freed. - */ - arch_mem_addpart(PFN_DOWN(__pa_symbol(&_text)) << PAGE_SHIFT, - PFN_UP(__pa_symbol(&_edata)) << PAGE_SHIFT, - BOOT_MEM_RAM); - arch_mem_addpart(PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT, - PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT, - BOOT_MEM_INIT_RAM); - - pr_info("Determined physical RAM map:\n"); - print_memory_map(); - strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c index 8f845f6e5f42..48a9c6b90e07 100644 --- a/arch/mips/kernel/vdso.c +++ b/arch/mips/kernel/vdso.c @@ -15,6 +15,7 @@ #include <linux/ioport.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/random.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/timekeeper_internal.h> @@ -97,6 +98,21 @@ void update_vsyscall_tz(void) } } +static unsigned long vdso_base(void) +{ + unsigned long base; + + /* Skip the delay slot emulation page */ + base = STACK_TOP + PAGE_SIZE; + + if (current->flags & PF_RANDOMIZE) { + base += get_random_int() & (VDSO_RANDOMIZE_SIZE - 1); + base = PAGE_ALIGN(base); + } + + return base; +} + int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mips_vdso_image *image = current->thread.abi->vdso; @@ -137,7 +153,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (cpu_has_dc_aliases) size += shm_align_mask + 1; - base = get_unmapped_area(NULL, 0, size, 0, 0); + base = get_unmapped_area(NULL, vdso_base(), size, 0, 0); if (IS_ERR_VALUE(base)) { ret = base; goto out; diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 3a6f34ef5ffc..069acec3df9f 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -280,9 +280,11 @@ * unset_bytes = end_addr - current_addr + 1 * a2 = t1 - a0 + 1 */ + .set reorder PTR_SUBU a2, t1, a0 + PTR_ADDIU a2, 1 jr ra - PTR_ADDIU a2, 1 + .set noreorder .endm diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 1a951b00465d..1fffbba8d6a5 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -9,6 +9,7 @@ extern void ppc_printk_progress(char *s, unsigned short hex); extern unsigned int rtas_data; extern unsigned long long memory_limit; +extern bool init_mem_is_free; extern unsigned long klimit; extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ea04dfb8c092..2d8fc8c9da7a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1314,9 +1314,7 @@ EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100) #ifdef CONFIG_PPC_DENORMALISATION mfspr r10,SPRN_HSRR1 - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */ - addi r11,r11,-4 /* HSRR0 is next instruction */ bne+ denorm_assist #endif @@ -1382,6 +1380,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) */ XVCPSGNDP32(32) denorm_done: + mfspr r11,SPRN_HSRR0 + subi r11,r11,4 mtspr SPRN_HSRR0,r11 mtcrf 0x80,r9 ld r9,PACA_EXGEN+EX_R9(r13) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 913c5725cdb2..bb6ac471a784 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1306,6 +1306,16 @@ void show_user_instructions(struct pt_regs *regs) pc = regs->nip - (instructions_to_print * 3 / 4 * sizeof(int)); + /* + * Make sure the NIP points at userspace, not kernel text/data or + * elsewhere. + */ + if (!__access_ok(pc, instructions_to_print * sizeof(int), USER_DS)) { + pr_info("%s[%d]: Bad NIP, not dumping instructions.\n", + current->comm, current->pid); + return; + } + pr_info("%s[%d]: code: ", current->comm, current->pid); for (i = 0; i < instructions_to_print; i++) { diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 6bffbc5affe7..7716374786bd 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -176,13 +176,27 @@ _GLOBAL(tm_reclaim) std r1, PACATMSCRATCH(r13) ld r1, PACAR1(r13) - /* Store the PPR in r11 and reset to decent value */ std r11, GPR11(r1) /* Temporary stash */ + /* + * Move the saved user r1 to the kernel stack in case PACATMSCRATCH is + * clobbered by an exception once we turn on MSR_RI below. + */ + ld r11, PACATMSCRATCH(r13) + std r11, GPR1(r1) + + /* + * Store r13 away so we can free up the scratch SPR for the SLB fault + * handler (needed once we start accessing the thread_struct). + */ + GET_SCRATCH0(r11) + std r11, GPR13(r1) + /* Reset MSR RI so we can take SLB faults again */ li r11, MSR_RI mtmsrd r11, 1 + /* Store the PPR in r11 and reset to decent value */ mfspr r11, SPRN_PPR HMT_MEDIUM @@ -207,11 +221,11 @@ _GLOBAL(tm_reclaim) SAVE_GPR(8, r7) /* user r8 */ SAVE_GPR(9, r7) /* user r9 */ SAVE_GPR(10, r7) /* user r10 */ - ld r3, PACATMSCRATCH(r13) /* user r1 */ + ld r3, GPR1(r1) /* user r1 */ ld r4, GPR7(r1) /* user r7 */ ld r5, GPR11(r1) /* user r11 */ ld r6, GPR12(r1) /* user r12 */ - GET_SCRATCH0(8) /* user r13 */ + ld r8, GPR13(r1) /* user r13 */ std r3, GPR1(r7) std r4, GPR7(r7) std r5, GPR11(r7) diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index 886ed94b9c13..d05c8af4ac51 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -443,6 +443,9 @@ _GLOBAL(csum_ipv6_magic) addc r0, r8, r9 ld r10, 0(r4) ld r11, 8(r4) +#ifdef CONFIG_CPU_LITTLE_ENDIAN + rotldi r5, r5, 8 +#endif adde r0, r0, r10 add r5, r5, r7 adde r0, r0, r11 diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 850f3b8f4da5..5ffee298745f 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -142,7 +142,7 @@ static inline int unmap_patch_area(unsigned long addr) return 0; } -int patch_instruction(unsigned int *addr, unsigned int instr) +static int do_patch_instruction(unsigned int *addr, unsigned int instr) { int err; unsigned int *patch_addr = NULL; @@ -182,12 +182,22 @@ out: } #else /* !CONFIG_STRICT_KERNEL_RWX */ -int patch_instruction(unsigned int *addr, unsigned int instr) +static int do_patch_instruction(unsigned int *addr, unsigned int instr) { return raw_patch_instruction(addr, instr); } #endif /* CONFIG_STRICT_KERNEL_RWX */ + +int patch_instruction(unsigned int *addr, unsigned int instr) +{ + /* Make sure we aren't patching a freed init section */ + if (init_mem_is_free && init_section_contains(addr, 4)) { + pr_debug("Skipping init section patching addr: 0x%px\n", addr); + return 0; + } + return do_patch_instruction(addr, instr); +} NOKPROBE_SYMBOL(patch_instruction); int patch_branch(unsigned int *addr, unsigned long target, int flags) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 5c8530d0c611..04ccb274a620 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -63,6 +63,7 @@ #endif unsigned long long memory_limit; +bool init_mem_is_free; #ifdef CONFIG_HIGHMEM pte_t *kmap_pte; @@ -396,6 +397,7 @@ void free_initmem(void) { ppc_md.progress = ppc_printk_progress; mark_initmem_nx(); + init_mem_is_free = true; free_initmem_default(POISON_FREE_INITMEM); } diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 35ac5422903a..055b211b7126 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1204,7 +1204,9 @@ int find_and_online_cpu_nid(int cpu) int new_nid; /* Use associativity from first thread for all siblings */ - vphn_get_associativity(cpu, associativity); + if (vphn_get_associativity(cpu, associativity)) + return cpu_to_node(cpu); + new_nid = associativity_to_nid(associativity); if (new_nid < 0 || !node_possible(new_nid)) new_nid = first_online_node; @@ -1215,9 +1217,10 @@ int find_and_online_cpu_nid(int cpu) * Need to ensure that NODE_DATA is initialized for a node from * available memory (see memblock_alloc_try_nid). If unable to * init the node, then default to nearest node that has memory - * installed. + * installed. Skip onlining a node if the subsystems are not + * yet initialized. */ - if (try_online_node(new_nid)) + if (!topology_inited || try_online_node(new_nid)) new_nid = first_online_node; #else /* @@ -1452,7 +1455,8 @@ static struct timer_list topology_timer; static void reset_topology_timer(void) { - mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ); + if (vphn_enabled) + mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ); } #ifdef CONFIG_SMP diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index 333b1f80c435..b271b283c785 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -45,7 +45,7 @@ static void scan_pkey_feature(void) * Since any pkey can be used for data or execute, we will just treat * all keys as equal and track them as one entity. */ - pkeys_total = be32_to_cpu(vals[0]); + pkeys_total = vals[0]; pkeys_devtree_defined = true; } diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c index 6c5db1acbe8d..fe9691040f54 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c +++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c @@ -276,7 +276,7 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, level_shift = entries_shift + 3; level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT); - if ((level_shift - 3) * levels + page_shift >= 60) + if ((level_shift - 3) * levels + page_shift >= 55) return -EINVAL; /* Allocate TCE table */ diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h new file mode 100644 index 000000000000..c9fecd120d18 --- /dev/null +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_PROTOTYPES_H + +#include <linux/ftrace.h> +#include <asm-generic/asm-prototypes.h> + +#endif /* _ASM_RISCV_PROTOTYPES_H */ diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index aee603123030..b2d26d9d8489 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -186,7 +186,7 @@ static void __init setup_bootmem(void) BUG_ON(mem_size == 0); set_max_mapnr(PFN_DOWN(mem_size)); - max_low_pfn = pfn_base + PFN_DOWN(mem_size); + max_low_pfn = memblock_end_of_DRAM(); #ifdef CONFIG_BLK_DEV_INITRD setup_initrd(); diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 3cae9168f63c..e44a8d7959f5 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -108,7 +108,8 @@ int sclp_early_get_core_info(struct sclp_core_info *info); void sclp_early_get_ipl_info(struct sclp_ipl_info *info); void sclp_early_detect(void); void sclp_early_printk(const char *s); -void __sclp_early_printk(const char *s, unsigned int len); +void sclp_early_printk_force(const char *s); +void __sclp_early_printk(const char *s, unsigned int len, unsigned int force); int _sclp_get_core_info(struct sclp_core_info *info); int sclp_core_configure(u8 core); diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c index 9431784d7796..40c1dfec944e 100644 --- a/arch/s390/kernel/early_printk.c +++ b/arch/s390/kernel/early_printk.c @@ -10,7 +10,7 @@ static void sclp_early_write(struct console *con, const char *s, unsigned int len) { - __sclp_early_printk(s, len); + __sclp_early_printk(s, len, 0); } static struct console sclp_early_console = { diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S index a049a7b9d6e8..c1a080b11ae9 100644 --- a/arch/s390/kernel/swsusp.S +++ b/arch/s390/kernel/swsusp.S @@ -198,12 +198,10 @@ pgm_check_entry: /* Suspend CPU not available -> panic */ larl %r15,init_thread_union - ahi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER) + aghi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER) + aghi %r15,-STACK_FRAME_OVERHEAD larl %r2,.Lpanic_string - lghi %r1,0 - sam31 - sigp %r1,%r0,SIGP_SET_ARCHITECTURE - brasl %r14,sclp_early_printk + brasl %r14,sclp_early_printk_force larl %r3,.Ldisabled_wait_31 lpsw 0(%r3) 4: diff --git a/arch/sparc/kernel/auxio_64.c b/arch/sparc/kernel/auxio_64.c index 4e8f56c3793c..cc42225c20f3 100644 --- a/arch/sparc/kernel/auxio_64.c +++ b/arch/sparc/kernel/auxio_64.c @@ -115,8 +115,8 @@ static int auxio_probe(struct platform_device *dev) auxio_devtype = AUXIO_TYPE_SBUS; size = 1; } else { - printk("auxio: Unknown parent bus type [%s]\n", - dp->parent->name); + printk("auxio: Unknown parent bus type [%pOFn]\n", + dp->parent); return -ENODEV; } auxio_register = of_ioremap(&dev->resource[0], 0, size, "auxio"); diff --git a/arch/sparc/kernel/kgdb_32.c b/arch/sparc/kernel/kgdb_32.c index 5868fc333ea8..639c8e54530a 100644 --- a/arch/sparc/kernel/kgdb_32.c +++ b/arch/sparc/kernel/kgdb_32.c @@ -122,7 +122,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, linux_regs->pc = addr; linux_regs->npc = addr + 4; } - /* fallthru */ + /* fall through */ case 'D': case 'k': diff --git a/arch/sparc/kernel/kgdb_64.c b/arch/sparc/kernel/kgdb_64.c index d5f7dc6323d5..a68bbddbdba4 100644 --- a/arch/sparc/kernel/kgdb_64.c +++ b/arch/sparc/kernel/kgdb_64.c @@ -148,7 +148,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, linux_regs->tpc = addr; linux_regs->tnpc = addr + 4; } - /* fallthru */ + /* fall through */ case 'D': case 'k': diff --git a/arch/sparc/kernel/power.c b/arch/sparc/kernel/power.c index 92627abce311..d941875dd718 100644 --- a/arch/sparc/kernel/power.c +++ b/arch/sparc/kernel/power.c @@ -41,8 +41,8 @@ static int power_probe(struct platform_device *op) power_reg = of_ioremap(res, 0, 0x4, "power"); - printk(KERN_INFO "%s: Control reg at %llx\n", - op->dev.of_node->name, res->start); + printk(KERN_INFO "%pOFn: Control reg at %llx\n", + op->dev.of_node, res->start); if (has_button_interrupt(irq, op->dev.of_node)) { if (request_irq(irq, diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c index b51cbb9e87dc..17c87d29ff20 100644 --- a/arch/sparc/kernel/prom_32.c +++ b/arch/sparc/kernel/prom_32.c @@ -68,8 +68,8 @@ static void __init sparc32_path_component(struct device_node *dp, char *tmp_buf) return; regs = rprop->value; - sprintf(tmp_buf, "%s@%x,%x", - dp->name, + sprintf(tmp_buf, "%pOFn@%x,%x", + dp, regs->which_io, regs->phys_addr); } @@ -84,8 +84,8 @@ static void __init sbus_path_component(struct device_node *dp, char *tmp_buf) return; regs = prop->value; - sprintf(tmp_buf, "%s@%x,%x", - dp->name, + sprintf(tmp_buf, "%pOFn@%x,%x", + dp, regs->which_io, regs->phys_addr); } @@ -104,13 +104,13 @@ static void __init pci_path_component(struct device_node *dp, char *tmp_buf) regs = prop->value; devfn = (regs->phys_hi >> 8) & 0xff; if (devfn & 0x07) { - sprintf(tmp_buf, "%s@%x,%x", - dp->name, + sprintf(tmp_buf, "%pOFn@%x,%x", + dp, devfn >> 3, devfn & 0x07); } else { - sprintf(tmp_buf, "%s@%x", - dp->name, + sprintf(tmp_buf, "%pOFn@%x", + dp, devfn >> 3); } } @@ -127,8 +127,8 @@ static void __init ebus_path_component(struct device_node *dp, char *tmp_buf) regs = prop->value; - sprintf(tmp_buf, "%s@%x,%x", - dp->name, + sprintf(tmp_buf, "%pOFn@%x,%x", + dp, regs->which_io, regs->phys_addr); } @@ -167,8 +167,8 @@ static void __init ambapp_path_component(struct device_node *dp, char *tmp_buf) return; device = prop->value; - sprintf(tmp_buf, "%s:%d:%d@%x,%x", - dp->name, *vendor, *device, + sprintf(tmp_buf, "%pOFn:%d:%d@%x,%x", + dp, *vendor, *device, *intr, reg0); } @@ -201,7 +201,7 @@ char * __init build_path_component(struct device_node *dp) tmp_buf[0] = '\0'; __build_path_component(dp, tmp_buf); if (tmp_buf[0] == '\0') - strcpy(tmp_buf, dp->name); + snprintf(tmp_buf, sizeof(tmp_buf), "%pOFn", dp); n = prom_early_alloc(strlen(tmp_buf) + 1); strcpy(n, tmp_buf); diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c index baeaeed64993..6220411ce8fc 100644 --- a/arch/sparc/kernel/prom_64.c +++ b/arch/sparc/kernel/prom_64.c @@ -82,8 +82,8 @@ static void __init sun4v_path_component(struct device_node *dp, char *tmp_buf) regs = rprop->value; if (!of_node_is_root(dp->parent)) { - sprintf(tmp_buf, "%s@%x,%x", - dp->name, + sprintf(tmp_buf, "%pOFn@%x,%x", + dp, (unsigned int) (regs->phys_addr >> 32UL), (unsigned int) (regs->phys_addr & 0xffffffffUL)); return; @@ -97,17 +97,17 @@ static void __init sun4v_path_component(struct device_node *dp, char *tmp_buf) const char *prefix = (type == 0) ? "m" : "i"; if (low_bits) - sprintf(tmp_buf, "%s@%s%x,%x", - dp->name, prefix, + sprintf(tmp_buf, "%pOFn@%s%x,%x", + dp, prefix, high_bits, low_bits); else - sprintf(tmp_buf, "%s@%s%x", - dp->name, + sprintf(tmp_buf, "%pOFn@%s%x", + dp, prefix, high_bits); } else if (type == 12) { - sprintf(tmp_buf, "%s@%x", - dp->name, high_bits); + sprintf(tmp_buf, "%pOFn@%x", + dp, high_bits); } } @@ -122,8 +122,8 @@ static void __init sun4u_path_component(struct device_node *dp, char *tmp_buf) regs = prop->value; if (!of_node_is_root(dp->parent)) { - sprintf(tmp_buf, "%s@%x,%x", - dp->name, + sprintf(tmp_buf, "%pOFn@%x,%x", + dp, (unsigned int) (regs->phys_addr >> 32UL), (unsigned int) (regs->phys_addr & 0xffffffffUL)); return; @@ -138,8 +138,8 @@ static void __init sun4u_path_component(struct device_node *dp, char *tmp_buf) if (tlb_type >= cheetah) mask = 0x7fffff; - sprintf(tmp_buf, "%s@%x,%x", - dp->name, + sprintf(tmp_buf, "%pOFn@%x,%x", + dp, *(u32 *)prop->value, (unsigned int) (regs->phys_addr & mask)); } @@ -156,8 +156,8 @@ static void __init sbus_path_component(struct device_node *dp, char *tmp_buf) return; regs = prop->value; - sprintf(tmp_buf, "%s@%x,%x", - dp->name, + sprintf(tmp_buf, "%pOFn@%x,%x", + dp, regs->which_io, regs->phys_addr); } @@ -176,13 +176,13 @@ static void __init pci_path_component(struct device_node *dp, char *tmp_buf) regs = prop->value; devfn = (regs->phys_hi >> 8) & 0xff; if (devfn & 0x07) { - sprintf(tmp_buf, "%s@%x,%x", - dp->name, + sprintf(tmp_buf, "%pOFn@%x,%x", + dp, devfn >> 3, devfn & 0x07); } else { - sprintf(tmp_buf, "%s@%x", - dp->name, + sprintf(tmp_buf, "%pOFn@%x", + dp, devfn >> 3); } } @@ -203,8 +203,8 @@ static void __init upa_path_component(struct device_node *dp, char *tmp_buf) if (!prop) return; - sprintf(tmp_buf, "%s@%x,%x", - dp->name, + sprintf(tmp_buf, "%pOFn@%x,%x", + dp, *(u32 *) prop->value, (unsigned int) (regs->phys_addr & 0xffffffffUL)); } @@ -221,7 +221,7 @@ static void __init vdev_path_component(struct device_node *dp, char *tmp_buf) regs = prop->value; - sprintf(tmp_buf, "%s@%x", dp->name, *regs); + sprintf(tmp_buf, "%pOFn@%x", dp, *regs); } /* "name@addrhi,addrlo" */ @@ -236,8 +236,8 @@ static void __init ebus_path_component(struct device_node *dp, char *tmp_buf) regs = prop->value; - sprintf(tmp_buf, "%s@%x,%x", - dp->name, + sprintf(tmp_buf, "%pOFn@%x,%x", + dp, (unsigned int) (regs->phys_addr >> 32UL), (unsigned int) (regs->phys_addr & 0xffffffffUL)); } @@ -257,8 +257,8 @@ static void __init i2c_path_component(struct device_node *dp, char *tmp_buf) /* This actually isn't right... should look at the #address-cells * property of the i2c bus node etc. etc. */ - sprintf(tmp_buf, "%s@%x,%x", - dp->name, regs[0], regs[1]); + sprintf(tmp_buf, "%pOFn@%x,%x", + dp, regs[0], regs[1]); } /* "name@reg0[,reg1]" */ @@ -274,11 +274,11 @@ static void __init usb_path_component(struct device_node *dp, char *tmp_buf) regs = prop->value; if (prop->length == sizeof(u32) || regs[1] == 1) { - sprintf(tmp_buf, "%s@%x", - dp->name, regs[0]); + sprintf(tmp_buf, "%pOFn@%x", + dp, regs[0]); } else { - sprintf(tmp_buf, "%s@%x,%x", - dp->name, regs[0], regs[1]); + sprintf(tmp_buf, "%pOFn@%x,%x", + dp, regs[0], regs[1]); } } @@ -295,11 +295,11 @@ static void __init ieee1394_path_component(struct device_node *dp, char *tmp_buf regs = prop->value; if (regs[2] || regs[3]) { - sprintf(tmp_buf, "%s@%08x%08x,%04x%08x", - dp->name, regs[0], regs[1], regs[2], regs[3]); + sprintf(tmp_buf, "%pOFn@%08x%08x,%04x%08x", + dp, regs[0], regs[1], regs[2], regs[3]); } else { - sprintf(tmp_buf, "%s@%08x%08x", - dp->name, regs[0], regs[1]); + sprintf(tmp_buf, "%pOFn@%08x%08x", + dp, regs[0], regs[1]); } } @@ -361,7 +361,7 @@ char * __init build_path_component(struct device_node *dp) tmp_buf[0] = '\0'; __build_path_component(dp, tmp_buf); if (tmp_buf[0] == '\0') - strcpy(tmp_buf, dp->name); + snprintf(tmp_buf, sizeof(tmp_buf), "%pOFn", dp); n = prom_early_alloc(strlen(tmp_buf) + 1); strcpy(n, tmp_buf); diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c index 635d67ffc9a3..7db5aabe9708 100644 --- a/arch/sparc/kernel/viohs.c +++ b/arch/sparc/kernel/viohs.c @@ -180,11 +180,17 @@ static int send_dreg(struct vio_driver_state *vio) struct vio_dring_register pkt; char all[sizeof(struct vio_dring_register) + (sizeof(struct ldc_trans_cookie) * - dr->ncookies)]; + VIO_MAX_RING_COOKIES)]; } u; + size_t bytes = sizeof(struct vio_dring_register) + + (sizeof(struct ldc_trans_cookie) * + dr->ncookies); int i; - memset(&u, 0, sizeof(u)); + if (WARN_ON(bytes > sizeof(u))) + return -EINVAL; + + memset(&u, 0, bytes); init_tag(&u.pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_DRING_REG); u.pkt.dring_ident = 0; u.pkt.num_descr = dr->num_entries; @@ -206,7 +212,7 @@ static int send_dreg(struct vio_driver_state *vio) (unsigned long long) u.pkt.cookies[i].cookie_size); } - return send_ctrl(vio, &u.pkt.tag, sizeof(u)); + return send_ctrl(vio, &u.pkt.tag, bytes); } static int send_rdx(struct vio_driver_state *vio) diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile index dd0b5a92ffd0..dc85570d8839 100644 --- a/arch/sparc/vdso/Makefile +++ b/arch/sparc/vdso/Makefile @@ -31,23 +31,21 @@ obj-y += $(vdso_img_objs) targets += $(vdso_img_cfiles) targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so) -export CPPFLAGS_vdso.lds += -P -C +CPPFLAGS_vdso.lds += -P -C VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ -Wl,--no-undefined \ -Wl,-z,max-page-size=8192 -Wl,-z,common-page-size=8192 \ $(DISABLE_LTO) -$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE +$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE $(call if_changed,vdso) HOST_EXTRACFLAGS += -I$(srctree)/tools/include hostprogs-y += vdso2c quiet_cmd_vdso2c = VDSO2C $@ -define cmd_vdso2c - $(obj)/vdso2c $< $(<:%.dbg=%) $@ -endef + cmd_vdso2c = $(obj)/vdso2c $< $(<:%.dbg=%) $@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE $(call if_changed,vdso2c) diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index eaa843a52907..a480356e0ed8 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -25,20 +25,6 @@ ENTRY(get_sev_encryption_bit) push %ebx push %ecx push %edx - push %edi - - /* - * RIP-relative addressing is needed to access the encryption bit - * variable. Since we are running in 32-bit mode we need this call/pop - * sequence to get the proper relative addressing. - */ - call 1f -1: popl %edi - subl $1b, %edi - - movl enc_bit(%edi), %eax - cmpl $0, %eax - jge .Lsev_exit /* Check if running under a hypervisor */ movl $1, %eax @@ -69,15 +55,12 @@ ENTRY(get_sev_encryption_bit) movl %ebx, %eax andl $0x3f, %eax /* Return the encryption bit location */ - movl %eax, enc_bit(%edi) jmp .Lsev_exit .Lno_sev: xor %eax, %eax - movl %eax, enc_bit(%edi) .Lsev_exit: - pop %edi pop %edx pop %ecx pop %ebx @@ -113,8 +96,6 @@ ENTRY(set_sev_encryption_mask) ENDPROC(set_sev_encryption_mask) .data -enc_bit: - .int 0xffffffff #ifdef CONFIG_AMD_MEM_ENCRYPT .balign 8 diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index fa3f439f0a92..141d415a8c80 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -68,7 +68,13 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \ -fno-omit-frame-pointer -foptimize-sibling-calls \ - -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO $(RETPOLINE_VDSO_CFLAGS) + -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO + +ifdef CONFIG_RETPOLINE +ifneq ($(RETPOLINE_VDSO_CFLAGS),) + CFL += $(RETPOLINE_VDSO_CFLAGS) +endif +endif $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) @@ -138,7 +144,13 @@ KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector) KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) KBUILD_CFLAGS_32 += -fno-omit-frame-pointer KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING -KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS) + +ifdef CONFIG_RETPOLINE +ifneq ($(RETPOLINE_VDSO_CFLAGS),) + KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS) +endif +endif + $(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) $(obj)/vdso32.so.dbg: FORCE \ diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index f19856d95c60..e48ca3afa091 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -43,8 +43,9 @@ extern u8 hvclock_page notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { long ret; - asm("syscall" : "=a" (ret) : - "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); + asm ("syscall" : "=a" (ret), "=m" (*ts) : + "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : + "memory", "rcx", "r11"); return ret; } @@ -52,8 +53,9 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) { long ret; - asm("syscall" : "=a" (ret) : - "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); + asm ("syscall" : "=a" (ret), "=m" (*tv), "=m" (*tz) : + "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : + "memory", "rcx", "r11"); return ret; } @@ -64,13 +66,13 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { long ret; - asm( + asm ( "mov %%ebx, %%edx \n" - "mov %2, %%ebx \n" + "mov %[clock], %%ebx \n" "call __kernel_vsyscall \n" "mov %%edx, %%ebx \n" - : "=a" (ret) - : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) + : "=a" (ret), "=m" (*ts) + : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts) : "memory", "edx"); return ret; } @@ -79,13 +81,13 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) { long ret; - asm( + asm ( "mov %%ebx, %%edx \n" - "mov %2, %%ebx \n" + "mov %[tv], %%ebx \n" "call __kernel_vsyscall \n" "mov %%edx, %%ebx \n" - : "=a" (ret) - : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) + : "=a" (ret), "=m" (*tv), "=m" (*tz) + : "0" (__NR_gettimeofday), [tv] "g" (tv), "c" (tz) : "memory", "edx"); return ret; } diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 981ba5e8241b..8671de126eac 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -36,6 +36,7 @@ static int num_counters_llc; static int num_counters_nb; +static bool l3_mask; static HLIST_HEAD(uncore_unused_list); @@ -209,6 +210,13 @@ static int amd_uncore_event_init(struct perf_event *event) hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; hwc->idx = -1; + /* + * SliceMask and ThreadMask need to be set for certain L3 events in + * Family 17h. For other events, the two fields do not affect the count. + */ + if (l3_mask) + hwc->config |= (AMD64_L3_SLICE_MASK | AMD64_L3_THREAD_MASK); + if (event->cpu < 0) return -EINVAL; @@ -525,6 +533,7 @@ static int __init amd_uncore_init(void) amd_llc_pmu.name = "amd_l3"; format_attr_event_df.show = &event_show_df; format_attr_event_l3.show = &event_show_l3; + l3_mask = true; } else { num_counters_nb = NUM_COUNTERS_NB; num_counters_llc = NUM_COUNTERS_L2; @@ -532,6 +541,7 @@ static int __init amd_uncore_init(void) amd_llc_pmu.name = "amd_l2"; format_attr_event_df = format_attr_event; format_attr_event_l3 = format_attr_event; + l3_mask = false; } amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 51d7c117e3c7..c07bee31abe8 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3061,7 +3061,7 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = { void bdx_uncore_cpu_init(void) { - int pkg = topology_phys_to_logical_pkg(0); + int pkg = topology_phys_to_logical_pkg(boot_cpu_data.phys_proc_id); if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; @@ -3931,16 +3931,16 @@ static const struct pci_device_id skx_uncore_pci_ids[] = { .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 5, SKX_PCI_UNCORE_M2PCIE, 3), }, { /* M3UPI0 Link 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C), - .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, SKX_PCI_UNCORE_M3UPI, 0), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 0), }, { /* M3UPI0 Link 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), - .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 1), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204E), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 2, SKX_PCI_UNCORE_M3UPI, 1), }, { /* M3UPI1 Link 2 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C), - .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 4, SKX_PCI_UNCORE_M3UPI, 2), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 5, SKX_PCI_UNCORE_M3UPI, 2), }, { /* end: all zeroes */ } }; diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index e203169931c7..6390bd8c141b 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -14,6 +14,16 @@ #ifndef _ASM_X86_FIXMAP_H #define _ASM_X86_FIXMAP_H +/* + * Exposed to assembly code for setting up initial page tables. Cannot be + * calculated in assembly code (fixmap entries are an enum), but is sanity + * checked in the actual fixmap C code to make sure that the fixmap is + * covered fully. + */ +#define FIXMAP_PMD_NUM 2 +/* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */ +#define FIXMAP_PMD_TOP 507 + #ifndef __ASSEMBLY__ #include <linux/kernel.h> #include <asm/acpi.h> diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index c0643831706e..616f8e637bc3 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -48,10 +48,13 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); /* Architecture __weak replacement functions */ void __init mem_encrypt_init(void); +void __init mem_encrypt_free_decrypted_mem(void); bool sme_active(void); bool sev_active(void); +#define __bss_decrypted __attribute__((__section__(".bss..decrypted"))) + #else /* !CONFIG_AMD_MEM_ENCRYPT */ #define sme_me_mask 0ULL @@ -77,6 +80,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; static inline int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; } +#define __bss_decrypted + #endif /* CONFIG_AMD_MEM_ENCRYPT */ /* @@ -88,6 +93,8 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; #define __sme_pa(x) (__pa(x) | sme_me_mask) #define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask) +extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[]; + #endif /* __ASSEMBLY__ */ #endif /* __X86_MEM_ENCRYPT_H__ */ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 12f54082f4c8..78241b736f2a 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -46,6 +46,14 @@ #define INTEL_ARCH_EVENT_MASK \ (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT) +#define AMD64_L3_SLICE_SHIFT 48 +#define AMD64_L3_SLICE_MASK \ + ((0xFULL) << AMD64_L3_SLICE_SHIFT) + +#define AMD64_L3_THREAD_SHIFT 56 +#define AMD64_L3_THREAD_MASK \ + ((0xFFULL) << AMD64_L3_THREAD_SHIFT) + #define X86_RAW_EVENT_MASK \ (ARCH_PERFMON_EVENTSEL_EVENT | \ ARCH_PERFMON_EVENTSEL_UMASK | \ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index ce2b59047cb8..9c85b54bf03c 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -14,6 +14,7 @@ #include <asm/processor.h> #include <linux/bitops.h> #include <linux/threads.h> +#include <asm/fixmap.h> extern p4d_t level4_kernel_pgt[512]; extern p4d_t level4_ident_pgt[512]; @@ -22,7 +23,7 @@ extern pud_t level3_ident_pgt[512]; extern pmd_t level2_kernel_pgt[512]; extern pmd_t level2_fixmap_pgt[512]; extern pmd_t level2_ident_pgt[512]; -extern pte_t level1_fixmap_pgt[512]; +extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM]; extern pgd_t init_top_pgt[]; #define swapper_pg_dir init_top_pgt diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index a80c0673798f..e60c45fd3679 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -10,8 +10,13 @@ struct cpumask; struct mm_struct; #ifdef CONFIG_X86_UV +#include <linux/efi.h> extern enum uv_system_type get_uv_system_type(void); +static inline bool is_early_uv_system(void) +{ + return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab); +} extern int is_uv_system(void); extern int is_uv_hubless(void); extern void uv_cpu_init(void); @@ -23,6 +28,7 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, #else /* X86_UV */ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } +static inline bool is_early_uv_system(void) { return 0; } static inline int is_uv_system(void) { return 0; } static inline int is_uv_hubless(void) { return 0; } static inline void uv_cpu_init(void) { } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 22ab408177b2..eeea634bee0a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -922,7 +922,7 @@ static void init_amd(struct cpuinfo_x86 *c) static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* AMD errata T13 (order #21922) */ - if ((c->x86 == 6)) { + if (c->x86 == 6) { /* Duron Rev A0 */ if (c->x86_model == 3 && c->x86_stepping == 0) size = 64; diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index 4e588f36228f..3736f6dc9545 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h @@ -382,6 +382,11 @@ static inline bool is_mbm_event(int e) e <= QOS_L3_MBM_LOCAL_EVENT_ID); } +struct rdt_parse_data { + struct rdtgroup *rdtgrp; + char *buf; +}; + /** * struct rdt_resource - attributes of an RDT resource * @rid: The index of the resource @@ -423,16 +428,19 @@ struct rdt_resource { struct rdt_cache cache; struct rdt_membw membw; const char *format_str; - int (*parse_ctrlval) (void *data, struct rdt_resource *r, - struct rdt_domain *d); + int (*parse_ctrlval)(struct rdt_parse_data *data, + struct rdt_resource *r, + struct rdt_domain *d); struct list_head evt_list; int num_rmid; unsigned int mon_scale; unsigned long fflags; }; -int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d); -int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d); +int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, + struct rdt_domain *d); +int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r, + struct rdt_domain *d); extern struct mutex rdtgroup_mutex; @@ -521,14 +529,14 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, int rdtgroup_schemata_show(struct kernfs_open_file *of, struct seq_file *s, void *v); bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, - u32 _cbm, int closid, bool exclusive); + unsigned long cbm, int closid, bool exclusive); unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d, - u32 cbm); + unsigned long cbm); enum rdtgrp_mode rdtgroup_mode_by_closid(int closid); int rdtgroup_tasks_assigned(struct rdtgroup *r); int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp); -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm); +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm); bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d); int rdt_pseudo_lock_init(void); void rdt_pseudo_lock_release(void); @@ -536,6 +544,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp); void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r); int update_domains(struct rdt_resource *r, int closid); +int closids_supported(void); void closid_free(int closid); int alloc_rmid(void); void free_rmid(u32 rmid); diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c index af358ca05160..0f53049719cd 100644 --- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c +++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c @@ -64,19 +64,19 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r) return true; } -int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d) +int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r, + struct rdt_domain *d) { - unsigned long data; - char *buf = _buf; + unsigned long bw_val; if (d->have_new_ctrl) { rdt_last_cmd_printf("duplicate domain %d\n", d->id); return -EINVAL; } - if (!bw_validate(buf, &data, r)) + if (!bw_validate(data->buf, &bw_val, r)) return -EINVAL; - d->new_ctrl = data; + d->new_ctrl = bw_val; d->have_new_ctrl = true; return 0; @@ -123,18 +123,13 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) return true; } -struct rdt_cbm_parse_data { - struct rdtgroup *rdtgrp; - char *buf; -}; - /* * Read one cache bit mask (hex). Check that it is valid for the current * resource type. */ -int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d) +int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, + struct rdt_domain *d) { - struct rdt_cbm_parse_data *data = _data; struct rdtgroup *rdtgrp = data->rdtgrp; u32 cbm_val; @@ -195,11 +190,17 @@ int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d) static int parse_line(char *line, struct rdt_resource *r, struct rdtgroup *rdtgrp) { - struct rdt_cbm_parse_data data; + struct rdt_parse_data data; char *dom = NULL, *id; struct rdt_domain *d; unsigned long dom_id; + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && + r->rid == RDT_RESOURCE_MBA) { + rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n"); + return -EINVAL; + } + next: if (!line || line[0] == '\0') return 0; diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c index 40f3903ae5d9..f8c260d522ca 100644 --- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c +++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c @@ -797,25 +797,27 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp) /** * rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked * @d: RDT domain - * @_cbm: CBM to test + * @cbm: CBM to test * - * @d represents a cache instance and @_cbm a capacity bitmask that is - * considered for it. Determine if @_cbm overlaps with any existing + * @d represents a cache instance and @cbm a capacity bitmask that is + * considered for it. Determine if @cbm overlaps with any existing * pseudo-locked region on @d. * - * Return: true if @_cbm overlaps with pseudo-locked region on @d, false + * @cbm is unsigned long, even if only 32 bits are used, to make the + * bitmap functions work correctly. + * + * Return: true if @cbm overlaps with pseudo-locked region on @d, false * otherwise. */ -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm) +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm) { - unsigned long *cbm = (unsigned long *)&_cbm; - unsigned long *cbm_b; unsigned int cbm_len; + unsigned long cbm_b; if (d->plr) { cbm_len = d->plr->r->cache.cbm_len; - cbm_b = (unsigned long *)&d->plr->cbm; - if (bitmap_intersects(cbm, cbm_b, cbm_len)) + cbm_b = d->plr->cbm; + if (bitmap_intersects(&cbm, &cbm_b, cbm_len)) return true; } return false; diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index b799c00bef09..b140c68bc14b 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -97,6 +97,12 @@ void rdt_last_cmd_printf(const char *fmt, ...) * limited as the number of resources grows. */ static int closid_free_map; +static int closid_free_map_len; + +int closids_supported(void) +{ + return closid_free_map_len; +} static void closid_init(void) { @@ -111,6 +117,7 @@ static void closid_init(void) /* CLOSID 0 is always reserved for the default group */ closid_free_map &= ~1; + closid_free_map_len = rdt_min_closid; } static int closid_alloc(void) @@ -802,7 +809,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, sw_shareable = 0; exclusive = 0; seq_printf(seq, "%d=", dom->id); - for (i = 0; i < r->num_closid; i++, ctrl++) { + for (i = 0; i < closids_supported(); i++, ctrl++) { if (!closid_allocated(i)) continue; mode = rdtgroup_mode_by_closid(i); @@ -968,33 +975,34 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of, * is false then overlaps with any resource group or hardware entities * will be considered. * + * @cbm is unsigned long, even if only 32 bits are used, to make the + * bitmap functions work correctly. + * * Return: false if CBM does not overlap, true if it does. */ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, - u32 _cbm, int closid, bool exclusive) + unsigned long cbm, int closid, bool exclusive) { - unsigned long *cbm = (unsigned long *)&_cbm; - unsigned long *ctrl_b; enum rdtgrp_mode mode; + unsigned long ctrl_b; u32 *ctrl; int i; /* Check for any overlap with regions used by hardware directly */ if (!exclusive) { - if (bitmap_intersects(cbm, - (unsigned long *)&r->cache.shareable_bits, - r->cache.cbm_len)) + ctrl_b = r->cache.shareable_bits; + if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) return true; } /* Check for overlap with other resource groups */ ctrl = d->ctrl_val; - for (i = 0; i < r->num_closid; i++, ctrl++) { - ctrl_b = (unsigned long *)ctrl; + for (i = 0; i < closids_supported(); i++, ctrl++) { + ctrl_b = *ctrl; mode = rdtgroup_mode_by_closid(i); if (closid_allocated(i) && i != closid && mode != RDT_MODE_PSEUDO_LOCKSETUP) { - if (bitmap_intersects(cbm, ctrl_b, r->cache.cbm_len)) { + if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) { if (exclusive) { if (mode == RDT_MODE_EXCLUSIVE) return true; @@ -1024,16 +1032,27 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) { int closid = rdtgrp->closid; struct rdt_resource *r; + bool has_cache = false; struct rdt_domain *d; for_each_alloc_enabled_rdt_resource(r) { + if (r->rid == RDT_RESOURCE_MBA) + continue; + has_cache = true; list_for_each_entry(d, &r->domains, list) { if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid], - rdtgrp->closid, false)) + rdtgrp->closid, false)) { + rdt_last_cmd_puts("schemata overlaps\n"); return false; + } } } + if (!has_cache) { + rdt_last_cmd_puts("cannot be exclusive without CAT/CDP\n"); + return false; + } + return true; } @@ -1085,7 +1104,6 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of, rdtgrp->mode = RDT_MODE_SHAREABLE; } else if (!strcmp(buf, "exclusive")) { if (!rdtgroup_mode_test_exclusive(rdtgrp)) { - rdt_last_cmd_printf("schemata overlaps\n"); ret = -EINVAL; goto out; } @@ -1121,15 +1139,18 @@ out: * computed by first dividing the total cache size by the CBM length to * determine how many bytes each bit in the bitmask represents. The result * is multiplied with the number of bits set in the bitmask. + * + * @cbm is unsigned long, even if only 32 bits are used to make the + * bitmap functions work correctly. */ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, - struct rdt_domain *d, u32 cbm) + struct rdt_domain *d, unsigned long cbm) { struct cpu_cacheinfo *ci; unsigned int size = 0; int num_b, i; - num_b = bitmap_weight((unsigned long *)&cbm, r->cache.cbm_len); + num_b = bitmap_weight(&cbm, r->cache.cbm_len); ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask)); for (i = 0; i < ci->num_leaves; i++) { if (ci->info_list[i].level == r->cache_level) { @@ -1155,8 +1176,8 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, struct rdt_resource *r; struct rdt_domain *d; unsigned int size; - bool sep = false; - u32 cbm; + bool sep; + u32 ctrl; rdtgrp = rdtgroup_kn_lock_live(of->kn); if (!rdtgrp) { @@ -1174,6 +1195,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, } for_each_alloc_enabled_rdt_resource(r) { + sep = false; seq_printf(s, "%*s:", max_name_width, r->name); list_for_each_entry(d, &r->domains, list) { if (sep) @@ -1181,8 +1203,13 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { size = 0; } else { - cbm = d->ctrl_val[rdtgrp->closid]; - size = rdtgroup_cbm_to_size(r, d, cbm); + ctrl = (!is_mba_sc(r) ? + d->ctrl_val[rdtgrp->closid] : + d->mbps_val[rdtgrp->closid]); + if (r->rid == RDT_RESOURCE_MBA) + size = ctrl; + else + size = rdtgroup_cbm_to_size(r, d, ctrl); } seq_printf(s, "%d=%u", d->id, size); sep = true; @@ -2330,18 +2357,25 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) u32 used_b = 0, unused_b = 0; u32 closid = rdtgrp->closid; struct rdt_resource *r; + unsigned long tmp_cbm; enum rdtgrp_mode mode; struct rdt_domain *d; int i, ret; u32 *ctrl; for_each_alloc_enabled_rdt_resource(r) { + /* + * Only initialize default allocations for CBM cache + * resources + */ + if (r->rid == RDT_RESOURCE_MBA) + continue; list_for_each_entry(d, &r->domains, list) { d->have_new_ctrl = false; d->new_ctrl = r->cache.shareable_bits; used_b = r->cache.shareable_bits; ctrl = d->ctrl_val; - for (i = 0; i < r->num_closid; i++, ctrl++) { + for (i = 0; i < closids_supported(); i++, ctrl++) { if (closid_allocated(i) && i != closid) { mode = rdtgroup_mode_by_closid(i); if (mode == RDT_MODE_PSEUDO_LOCKSETUP) @@ -2361,9 +2395,14 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) * modify the CBM based on system availability. */ cbm_ensure_valid(&d->new_ctrl, r); - if (bitmap_weight((unsigned long *) &d->new_ctrl, - r->cache.cbm_len) < - r->cache.min_cbm_bits) { + /* + * Assign the u32 CBM to an unsigned long to ensure + * that bitmap_weight() does not access out-of-bound + * memory. + */ + tmp_cbm = d->new_ctrl; + if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < + r->cache.min_cbm_bits) { rdt_last_cmd_printf("no space on %s:%d\n", r->name, d->id); return -ENOSPC; @@ -2373,6 +2412,12 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) } for_each_alloc_enabled_rdt_resource(r) { + /* + * Only initialize default allocations for CBM cache + * resources + */ + if (r->rid == RDT_RESOURCE_MBA) + continue; ret = update_domains(r, rdtgrp->closid); if (ret < 0) { rdt_last_cmd_puts("failed to initialize allocations\n"); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 8047379e575a..ddee1f0870c4 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -35,6 +35,7 @@ #include <asm/bootparam_utils.h> #include <asm/microcode.h> #include <asm/kasan.h> +#include <asm/fixmap.h> /* * Manage page tables very early on. @@ -112,6 +113,7 @@ static bool __head check_la57_support(unsigned long physaddr) unsigned long __head __startup_64(unsigned long physaddr, struct boot_params *bp) { + unsigned long vaddr, vaddr_end; unsigned long load_delta, *p; unsigned long pgtable_flags; pgdval_t *pgd; @@ -165,7 +167,8 @@ unsigned long __head __startup_64(unsigned long physaddr, pud[511] += load_delta; pmd = fixup_pointer(level2_fixmap_pgt, physaddr); - pmd[506] += load_delta; + for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--) + pmd[i] += load_delta; /* * Set up the identity mapping for the switchover. These @@ -235,6 +238,21 @@ unsigned long __head __startup_64(unsigned long physaddr, sme_encrypt_kernel(bp); /* + * Clear the memory encryption mask from the .bss..decrypted section. + * The bss section will be memset to zero later in the initialization so + * there is no need to zero it after changing the memory encryption + * attribute. + */ + if (mem_encrypt_active()) { + vaddr = (unsigned long)__start_bss_decrypted; + vaddr_end = (unsigned long)__end_bss_decrypted; + for (; vaddr < vaddr_end; vaddr += PMD_SIZE) { + i = pmd_index(vaddr); + pmd[i] -= sme_get_me_mask(); + } + } + + /* * Return the SME encryption mask (if SME is active) to be used as a * modifier for the initial pgdir entry programmed into CR3. */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 15ebc2fc166e..a3618cf04cf6 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -24,6 +24,7 @@ #include "../entry/calling.h" #include <asm/export.h> #include <asm/nospec-branch.h> +#include <asm/fixmap.h> #ifdef CONFIG_PARAVIRT #include <asm/asm-offsets.h> @@ -445,13 +446,20 @@ NEXT_PAGE(level2_kernel_pgt) KERNEL_IMAGE_SIZE/PMD_SIZE) NEXT_PAGE(level2_fixmap_pgt) - .fill 506,8,0 - .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC - /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ - .fill 5,8,0 + .fill (512 - 4 - FIXMAP_PMD_NUM),8,0 + pgtno = 0 + .rept (FIXMAP_PMD_NUM) + .quad level1_fixmap_pgt + (pgtno << PAGE_SHIFT) - __START_KERNEL_map \ + + _PAGE_TABLE_NOENC; + pgtno = pgtno + 1 + .endr + /* 6 MB reserved space + a 2MB hole */ + .fill 4,8,0 NEXT_PAGE(level1_fixmap_pgt) + .rept (FIXMAP_PMD_NUM) .fill 512,8,0 + .endr #undef PMDS diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 1e6764648af3..013fe3d21dbb 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -28,6 +28,7 @@ #include <linux/sched/clock.h> #include <linux/mm.h> #include <linux/slab.h> +#include <linux/set_memory.h> #include <asm/hypervisor.h> #include <asm/mem_encrypt.h> @@ -61,9 +62,10 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall); (PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info)) static struct pvclock_vsyscall_time_info - hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __aligned(PAGE_SIZE); -static struct pvclock_wall_clock wall_clock; + hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE); +static struct pvclock_wall_clock wall_clock __bss_decrypted; static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); +static struct pvclock_vsyscall_time_info *hvclock_mem; static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) { @@ -236,6 +238,45 @@ static void kvm_shutdown(void) native_machine_shutdown(); } +static void __init kvmclock_init_mem(void) +{ + unsigned long ncpus; + unsigned int order; + struct page *p; + int r; + + if (HVC_BOOT_ARRAY_SIZE >= num_possible_cpus()) + return; + + ncpus = num_possible_cpus() - HVC_BOOT_ARRAY_SIZE; + order = get_order(ncpus * sizeof(*hvclock_mem)); + + p = alloc_pages(GFP_KERNEL, order); + if (!p) { + pr_warn("%s: failed to alloc %d pages", __func__, (1U << order)); + return; + } + + hvclock_mem = page_address(p); + + /* + * hvclock is shared between the guest and the hypervisor, must + * be mapped decrypted. + */ + if (sev_active()) { + r = set_memory_decrypted((unsigned long) hvclock_mem, + 1UL << order); + if (r) { + __free_pages(p, order); + hvclock_mem = NULL; + pr_warn("kvmclock: set_memory_decrypted() failed. Disabling\n"); + return; + } + } + + memset(hvclock_mem, 0, PAGE_SIZE << order); +} + static int __init kvm_setup_vsyscall_timeinfo(void) { #ifdef CONFIG_X86_64 @@ -250,6 +291,9 @@ static int __init kvm_setup_vsyscall_timeinfo(void) kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK; #endif + + kvmclock_init_mem(); + return 0; } early_initcall(kvm_setup_vsyscall_timeinfo); @@ -269,8 +313,10 @@ static int kvmclock_setup_percpu(unsigned int cpu) /* Use the static page for the first CPUs, allocate otherwise */ if (cpu < HVC_BOOT_ARRAY_SIZE) p = &hv_clock_boot[cpu]; + else if (hvclock_mem) + p = hvclock_mem + cpu - HVC_BOOT_ARRAY_SIZE; else - p = kzalloc(sizeof(*p), GFP_KERNEL); + return -ENOMEM; per_cpu(hv_clock_per_cpu, cpu) = p; return p ? 0 : -ENOMEM; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index afdb303285f8..8dc69d82567e 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -91,7 +91,7 @@ unsigned paravirt_patch_call(void *insnbuf, if (len < 5) { #ifdef CONFIG_RETPOLINE - WARN_ONCE("Failing to patch indirect CALL in %ps\n", (void *)addr); + WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr); #endif return len; /* call too long for patch site */ } @@ -111,7 +111,7 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target, if (len < 5) { #ifdef CONFIG_RETPOLINE - WARN_ONCE("Failing to patch indirect JMP in %ps\n", (void *)addr); + WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void *)addr); #endif return len; /* call too long for patch site */ } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 6490f618e096..b52bd2b6cdb4 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -26,6 +26,7 @@ #include <asm/apic.h> #include <asm/intel-family.h> #include <asm/i8259.h> +#include <asm/uv/uv.h> unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -1433,6 +1434,9 @@ void __init tsc_early_init(void) { if (!boot_cpu_has(X86_FEATURE_TSC)) return; + /* Don't change UV TSC multi-chassis synchronization */ + if (is_early_uv_system()) + return; if (!determine_cpu_tsc_frequencies(true)) return; loops_per_jiffy = get_loops_per_jiffy(); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 8bde0a419f86..5dd3317d761f 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -65,6 +65,23 @@ jiffies_64 = jiffies; #define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE); #define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE); +/* + * This section contains data which will be mapped as decrypted. Memory + * encryption operates on a page basis. Make this section PMD-aligned + * to avoid splitting the pages while mapping the section early. + * + * Note: We use a separate section so that only this section gets + * decrypted to avoid exposing more than we wish. + */ +#define BSS_DECRYPTED \ + . = ALIGN(PMD_SIZE); \ + __start_bss_decrypted = .; \ + *(.bss..decrypted); \ + . = ALIGN(PAGE_SIZE); \ + __start_bss_decrypted_unused = .; \ + . = ALIGN(PMD_SIZE); \ + __end_bss_decrypted = .; \ + #else #define X86_ALIGN_RODATA_BEGIN @@ -74,6 +91,7 @@ jiffies_64 = jiffies; #define ALIGN_ENTRY_TEXT_BEGIN #define ALIGN_ENTRY_TEXT_END +#define BSS_DECRYPTED #endif @@ -355,6 +373,7 @@ SECTIONS __bss_start = .; *(.bss..page_aligned) *(.bss) + BSS_DECRYPTED . = ALIGN(PAGE_SIZE); __bss_stop = .; } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 7a8fc26c1115..faca978ebf9d 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -815,10 +815,14 @@ void free_kernel_image_pages(void *begin, void *end) set_memory_np_noalias(begin_ul, len_pages); } +void __weak mem_encrypt_free_decrypted_mem(void) { } + void __ref free_initmem(void) { e820__reallocate_tables(); + mem_encrypt_free_decrypted_mem(); + free_kernel_image_pages(&__init_begin, &__init_end); } diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index b2de398d1fd3..006f373f54ab 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -348,6 +348,30 @@ bool sev_active(void) EXPORT_SYMBOL(sev_active); /* Architecture __weak replacement functions */ +void __init mem_encrypt_free_decrypted_mem(void) +{ + unsigned long vaddr, vaddr_end, npages; + int r; + + vaddr = (unsigned long)__start_bss_decrypted_unused; + vaddr_end = (unsigned long)__end_bss_decrypted; + npages = (vaddr_end - vaddr) >> PAGE_SHIFT; + + /* + * The unused memory range was mapped decrypted, change the encryption + * attribute from decrypted to encrypted before freeing it. + */ + if (mem_encrypt_active()) { + r = set_memory_encrypted(vaddr, npages); + if (r) { + pr_warn("failed to free unused decrypted pages\n"); + return; + } + } + + free_init_pages("unused decrypted", vaddr, vaddr_end); +} + void __init mem_encrypt_init(void) { if (!sme_me_mask) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index ae394552fb94..59274e2c1ac4 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -115,6 +115,8 @@ static inline void pgd_list_del(pgd_t *pgd) #define UNSHARED_PTRS_PER_PGD \ (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) +#define MAX_UNSHARED_PTRS_PER_PGD \ + max_t(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD) static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) @@ -181,6 +183,7 @@ static void pgd_dtor(pgd_t *pgd) * and initialize the kernel pmds here. */ #define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD +#define MAX_PREALLOCATED_PMDS MAX_UNSHARED_PTRS_PER_PGD /* * We allocate separate PMDs for the kernel part of the user page-table @@ -189,6 +192,7 @@ static void pgd_dtor(pgd_t *pgd) */ #define PREALLOCATED_USER_PMDS (static_cpu_has(X86_FEATURE_PTI) ? \ KERNEL_PGD_PTRS : 0) +#define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) { @@ -210,7 +214,9 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) /* No need to prepopulate any pagetable entries in non-PAE modes. */ #define PREALLOCATED_PMDS 0 +#define MAX_PREALLOCATED_PMDS 0 #define PREALLOCATED_USER_PMDS 0 +#define MAX_PREALLOCATED_USER_PMDS 0 #endif /* CONFIG_X86_PAE */ static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count) @@ -428,8 +434,8 @@ static inline void _pgd_free(pgd_t *pgd) pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; - pmd_t *u_pmds[PREALLOCATED_USER_PMDS]; - pmd_t *pmds[PREALLOCATED_PMDS]; + pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS]; + pmd_t *pmds[MAX_PREALLOCATED_PMDS]; pgd = _pgd_alloc(); @@ -637,6 +643,15 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte) { unsigned long address = __fix_to_virt(idx); +#ifdef CONFIG_X86_64 + /* + * Ensure that the static initial page tables are covering the + * fixmap completely. + */ + BUILD_BUG_ON(__end_of_permanent_fixed_addresses > + (FIXMAP_PMD_NUM * PTRS_PER_PTE)); +#endif + if (idx >= __end_of_fixed_addresses) { BUG(); return; diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 2fe5c9b1816b..dd461c0167ef 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1907,7 +1907,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* L3_k[511] -> level2_fixmap_pgt */ convert_pfn_mfn(level3_kernel_pgt); - /* L3_k[511][506] -> level1_fixmap_pgt */ + /* L3_k[511][508-FIXMAP_PMD_NUM ... 507] -> level1_fixmap_pgt */ convert_pfn_mfn(level2_fixmap_pgt); /* We get [511][511] and have Xen's version of level2_kernel_pgt */ @@ -1952,7 +1952,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); - set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO); + + for (i = 0; i < FIXMAP_PMD_NUM; i++) { + set_page_prot(level1_fixmap_pgt + i * PTRS_PER_PTE, + PAGE_KERNEL_RO); + } /* Pin down new L4 */ pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 7d00d4ad44d4..95997e6c0696 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -478,7 +478,7 @@ static void xen_convert_regs(const struct xen_pmu_regs *xen_regs, irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) { int err, ret = IRQ_NONE; - struct pt_regs regs; + struct pt_regs regs = {0}; const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); uint8_t xenpmu_flags = get_xenpmu_flags(); |