diff options
Diffstat (limited to 'arch')
135 files changed, 2487 insertions, 1166 deletions
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 976e89b116e5..de6c4df61082 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -122,6 +122,8 @@ #define SO_RCVTIMEO_NEW 66 #define SO_SNDTIMEO_NEW 67 +#define SO_DETACH_REUSEPORT_BPF 68 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 9e7704e44f6d..1db9bbcfb84e 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -473,3 +473,4 @@ 541 common fsconfig sys_fsconfig 542 common fsmount sys_fsmount 543 common fspick sys_fspick +544 common pidfd_open sys_pidfd_open diff --git a/arch/arc/plat-eznps/Kconfig b/arch/arc/plat-eznps/Kconfig index 2eaecfb063a7..a376a50d3fea 100644 --- a/arch/arc/plat-eznps/Kconfig +++ b/arch/arc/plat-eznps/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # # For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. +# see Documentation/kbuild/kconfig-language.rst. # menuconfig ARC_PLAT_EZNPS diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index c87cc9a6fb3c..d850feb5cc0a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -4,6 +4,7 @@ config ARM default y select ARCH_32BIT_OFF_T select ARCH_CLOCKSOURCE_DATA + select ARCH_HAS_BINFMT_FLAT select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE @@ -30,6 +31,7 @@ config ARM select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_IPC_PARSE_VERSION + select BINFMT_FLAT_ARGVP_ENVP_ON_STACK select BUILDTIME_EXTABLE_SORT if MMU select CLONE_BACKWARDS select CPU_PM if SUSPEND || CPU_IDLE @@ -1293,7 +1295,7 @@ config SMP uniprocessor machines. On a uniprocessor machine, the kernel will run faster if you say N here. - See also <file:Documentation/x86/i386/IO-APIC.txt>, + See also <file:Documentation/x86/i386/IO-APIC.rst>, <file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO available at <http://tldp.org/HOWTO/SMP-HOWTO.html>. @@ -2040,7 +2042,7 @@ config CRASH_DUMP kdump/kexec. The crash dump kernel must be compiled to a memory address not used by the main kernel - For more details see Documentation/kdump/kdump.txt + For more details see Documentation/kdump/kdump.rst config AUTO_ZRELADDR bool "Auto calculation of the decompressed kernel image address" diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index a8f149ab45b8..6b2dc15b6dff 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -5,6 +5,7 @@ generic-y += early_ioremap.h generic-y += emergency-restart.h generic-y += exec.h generic-y += extable.h +generic-y += flat.h generic-y += irq_regs.h generic-y += kdebug.h generic-y += local.h diff --git a/arch/arm/include/asm/flat.h b/arch/arm/include/asm/flat.h deleted file mode 100644 index f0c75ddeea23..000000000000 --- a/arch/arm/include/asm/flat.h +++ /dev/null @@ -1,37 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/arm/include/asm/flat.h -- uClinux flat-format executables - */ - -#ifndef __ARM_FLAT_H__ -#define __ARM_FLAT_H__ - -#include <linux/uaccess.h> - -#define flat_argvp_envp_on_stack() 1 -#define flat_old_ram_flag(flags) (flags) -#define flat_reloc_valid(reloc, size) ((reloc) <= (size)) - -static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags, - u32 *addr, u32 *persistent) -{ -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - return copy_from_user(addr, rp, 4) ? -EFAULT : 0; -#else - return get_user(*addr, rp); -#endif -} - -static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel) -{ -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - return copy_to_user(rp, &addr, 4) ? -EFAULT : 0; -#else - return put_user(addr, rp); -#endif -} - -#define flat_get_relocate_addr(rel) (rel) -#define flat_set_persistent(relval, p) 0 - -#endif /* __ARM_FLAT_H__ */ diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 9fb00973c608..3676e82cf95c 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -37,6 +37,7 @@ #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 /* * Unimplemented (or alternatively implemented) syscalls diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index adff54c312bf..97dc386e3cb8 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -733,7 +733,8 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], /* ALU operation */ emit_alu_r(rd[1], rs, true, false, op, ctx); - emit_a32_mov_i(rd[0], 0, ctx); + if (!ctx->prog->aux->verifier_zext) + emit_a32_mov_i(rd[0], 0, ctx); } arm_bpf_put_reg64(dst, rd, ctx); @@ -755,8 +756,9 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], struct jit_ctx *ctx) { if (!is64) { emit_a32_mov_r(dst_lo, src_lo, ctx); - /* Zero out high 4 bytes */ - emit_a32_mov_i(dst_hi, 0, ctx); + if (!ctx->prog->aux->verifier_zext) + /* Zero out high 4 bytes */ + emit_a32_mov_i(dst_hi, 0, ctx); } else if (__LINUX_ARM_ARCH__ < 6 && ctx->cpu_architecture < CPU_ARCH_ARMv5TE) { /* complete 8 byte move */ @@ -1057,17 +1059,20 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src, case BPF_B: /* Load a Byte */ emit(ARM_LDRB_I(rd[1], rm, off), ctx); - emit_a32_mov_i(rd[0], 0, ctx); + if (!ctx->prog->aux->verifier_zext) + emit_a32_mov_i(rd[0], 0, ctx); break; case BPF_H: /* Load a HalfWord */ emit(ARM_LDRH_I(rd[1], rm, off), ctx); - emit_a32_mov_i(rd[0], 0, ctx); + if (!ctx->prog->aux->verifier_zext) + emit_a32_mov_i(rd[0], 0, ctx); break; case BPF_W: /* Load a Word */ emit(ARM_LDR_I(rd[1], rm, off), ctx); - emit_a32_mov_i(rd[0], 0, ctx); + if (!ctx->prog->aux->verifier_zext) + emit_a32_mov_i(rd[0], 0, ctx); break; case BPF_DW: /* Load a Double Word */ @@ -1356,6 +1361,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU64 | BPF_MOV | BPF_X: switch (BPF_SRC(code)) { case BPF_X: + if (imm == 1) { + /* Special mov32 for zext */ + emit_a32_mov_i(dst_hi, 0, ctx); + break; + } emit_a32_mov_r64(is64, dst, src, ctx); break; case BPF_K: @@ -1435,7 +1445,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) } emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code)); arm_bpf_put_reg32(dst_lo, rd_lo, ctx); - emit_a32_mov_i(dst_hi, 0, ctx); + if (!ctx->prog->aux->verifier_zext) + emit_a32_mov_i(dst_hi, 0, ctx); break; case BPF_ALU64 | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_X: @@ -1450,7 +1461,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) return -EINVAL; if (imm) emit_a32_alu_i(dst_lo, imm, ctx, BPF_OP(code)); - emit_a32_mov_i(dst_hi, 0, ctx); + if (!ctx->prog->aux->verifier_zext) + emit_a32_mov_i(dst_hi, 0, ctx); break; /* dst = dst << imm */ case BPF_ALU64 | BPF_LSH | BPF_K: @@ -1485,7 +1497,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) /* dst = ~dst */ case BPF_ALU | BPF_NEG: emit_a32_alu_i(dst_lo, 0, ctx, BPF_OP(code)); - emit_a32_mov_i(dst_hi, 0, ctx); + if (!ctx->prog->aux->verifier_zext) + emit_a32_mov_i(dst_hi, 0, ctx); break; /* dst = ~dst (64 bit) */ case BPF_ALU64 | BPF_NEG: @@ -1541,11 +1554,13 @@ emit_bswap_uxt: #else /* ARMv6+ */ emit(ARM_UXTH(rd[1], rd[1]), ctx); #endif - emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); + if (!ctx->prog->aux->verifier_zext) + emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); break; case 32: /* zero-extend 32 bits into 64 bits */ - emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); + if (!ctx->prog->aux->verifier_zext) + emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); break; case 64: /* nop */ @@ -1835,6 +1850,11 @@ void bpf_jit_compile(struct bpf_prog *prog) /* Nothing to do here. We support Internal BPF. */ } +bool bpf_jit_needs_zext(void) +{ + return true; +} + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { struct bpf_prog *tmp, *orig_prog = prog; diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index aaf479a9e92d..6da7dc4d79cc 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -447,3 +447,5 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open +435 common clone3 sys_clone3 diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c1734e444fb8..c085aec9459b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -998,7 +998,7 @@ config CRASH_DUMP reserved region and then later executed after a crash by kdump/kexec. - For more details see Documentation/kdump/kdump.txt + For more details see Documentation/kdump/kdump.rst config XEN_DOM0 def_bool y diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index bf7f845447ed..22a1c74dddf3 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -431,6 +431,12 @@ compatible = "fsl,enetc"; reg = <0x000100 0 0 0 0>; }; + ethernet@0,4 { + compatible = "fsl,enetc-ptp"; + reg = <0x000400 0 0 0 0>; + clocks = <&clockgen 4 0>; + little-endian; + }; }; }; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi index 661137ffa319..dacd8cf03a7f 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi @@ -609,6 +609,14 @@ <GIC_SPI 209 IRQ_TYPE_LEVEL_HIGH>; }; + ptp-timer@8b95000 { + compatible = "fsl,dpaa2-ptp"; + reg = <0x0 0x8b95000 0x0 0x100>; + clocks = <&clockgen 4 0>; + little-endian; + fsl,extts-fifo; + }; + cluster1_core0_watchdog: wdt@c000000 { compatible = "arm,sp805-wdt", "arm,primecell"; reg = <0x0 0xc000000 0x0 0x1000>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi index d7e78dcd153d..3ace91945b72 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi @@ -321,6 +321,14 @@ }; }; + ptp-timer@8b95000 { + compatible = "fsl,dpaa2-ptp"; + reg = <0x0 0x8b95000 0x0 0x100>; + clocks = <&clockgen 4 1>; + little-endian; + fsl,extts-fifo; + }; + fsl_mc: fsl-mc@80c000000 { compatible = "fsl,qoriq-mc"; reg = <0x00000008 0x0c000000 0 0x40>, /* MC portal base */ diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi index 125a8cc2c5b3..e6fdba39453c 100644 --- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi @@ -848,6 +848,14 @@ dma-coherent; }; + ptp-timer@8b95000 { + compatible = "fsl,dpaa2-ptp"; + reg = <0x0 0x8b95000 0x0 0x100>; + clocks = <&clockgen 4 1>; + little-endian; + fsl,extts-fifo; + }; + fsl_mc: fsl-mc@80c000000 { compatible = "fsl,qoriq-mc"; reg = <0x00000008 0x0c000000 0 0x40>, diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 4b1f5ae710eb..d1e13d340e26 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -929,7 +929,8 @@ sgmiisys: sgmiisys@1b128000 { compatible = "mediatek,mt7622-sgmiisys", "syscon"; - reg = <0 0x1b128000 0 0x1000>; + reg = <0 0x1b128000 0 0x3000>; #clock-cells = <1>; + mediatek,physpeed = "2500"; }; }; diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index c9e9a6978e73..8e79ce9c3f5c 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -83,7 +83,7 @@ static inline unsigned long efi_get_max_fdt_addr(unsigned long dram_base) * guaranteed to cover the kernel Image. * * Since the EFI stub is part of the kernel Image, we can relax the - * usual requirements in Documentation/arm64/booting.txt, which still + * usual requirements in Documentation/arm64/booting.rst, which still * apply to other bootloaders, and are required for some kernel * configurations. */ diff --git a/arch/arm64/include/asm/image.h b/arch/arm64/include/asm/image.h index e2c27a2278e9..c2b13213c720 100644 --- a/arch/arm64/include/asm/image.h +++ b/arch/arm64/include/asm/image.h @@ -27,7 +27,7 @@ /* * struct arm64_image_header - arm64 kernel image header - * See Documentation/arm64/booting.txt for details + * See Documentation/arm64/booting.rst for details * * @code0: Executable code, or * @mz_header alternatively used for part of MZ header diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 2a23614198f1..2629a68b8724 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -38,10 +38,11 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 434 +#define __NR_compat_syscalls 436 #endif #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 #ifndef __COMPAT_SYSCALL_NR #include <uapi/asm/unistd.h> diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index aa995920bd34..94ab29cf4f00 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -875,6 +875,10 @@ __SYSCALL(__NR_fsconfig, sys_fsconfig) __SYSCALL(__NR_fsmount, sys_fsmount) #define __NR_fspick 433 __SYSCALL(__NR_fspick, sys_fspick) +#define __NR_pidfd_open 434 +__SYSCALL(__NR_pidfd_open, sys_pidfd_open) +#define __NR_clone3 435 +__SYSCALL(__NR_clone3, sys_clone3) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index 3d448a0bb225..8b0ebce92427 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -146,7 +146,7 @@ struct sve_context { * vector length beyond its initial architectural limit of 2048 bits * (16 quadwords). * - * See linux/Documentation/arm64/sve.txt for a description of the VL/VQ + * See linux/Documentation/arm64/sve.rst for a description of the VL/VQ * terminology. */ #define SVE_VQ_BYTES __SVE_VQ_BYTES /* bytes per quadword */ diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index 07bf740bea91..2514fd6f12cb 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -53,7 +53,7 @@ static void *image_load(struct kimage *image, /* * We require a kernel with an unambiguous Image header. Per - * Documentation/booting.txt, this is the case when image_size + * Documentation/arm64/booting.rst, this is the case when image_size * is non-zero (practically speaking, since v3.17). */ h = (struct arm64_image_header *)kernel; diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig index eeb0471268a0..b4fb61c83494 100644 --- a/arch/c6x/Kconfig +++ b/arch/c6x/Kconfig @@ -1,12 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 # # For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. +# see Documentation/kbuild/kconfig-language.rst. # config C6X def_bool y select ARCH_32BIT_OFF_T + select ARCH_HAS_BINFMT_FLAT select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select CLKDEV_LOOKUP diff --git a/arch/c6x/include/asm/flat.h b/arch/c6x/include/asm/flat.h index 76fd0bb962a3..9e6544b51386 100644 --- a/arch/c6x/include/asm/flat.h +++ b/arch/c6x/include/asm/flat.h @@ -4,11 +4,8 @@ #include <asm/unaligned.h> -#define flat_argvp_envp_on_stack() 0 -#define flat_old_ram_flag(flags) (flags) -#define flat_reloc_valid(reloc, size) ((reloc) <= (size)) static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags, - u32 *addr, u32 *persistent) + u32 *addr) { *addr = get_unaligned((__force u32 *)rp); return 0; @@ -18,7 +15,5 @@ static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel) put_unaligned(addr, (__force u32 *)rp); return 0; } -#define flat_get_relocate_addr(rel) (rel) -#define flat_set_persistent(relval, p) 0 #endif /* __ASM_C6X_FLAT_H */ diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index ecfc4b4b6373..ec800e9d5aad 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -2,6 +2,9 @@ config H8300 def_bool y select ARCH_32BIT_OFF_T + select ARCH_HAS_BINFMT_FLAT + select BINFMT_FLAT_ARGVP_ENVP_ON_STACK + select BINFMT_FLAT_OLD_ALWAYS_RAM select GENERIC_ATOMIC64 select HAVE_UID16 select VIRT_TO_BUS diff --git a/arch/h8300/include/asm/flat.h b/arch/h8300/include/asm/flat.h index f4cdfcbdd2ba..78070f924177 100644 --- a/arch/h8300/include/asm/flat.h +++ b/arch/h8300/include/asm/flat.h @@ -8,11 +8,6 @@ #include <asm/unaligned.h> -#define flat_argvp_envp_on_stack() 1 -#define flat_old_ram_flag(flags) 1 -#define flat_reloc_valid(reloc, size) ((reloc) <= (size)) -#define flat_set_persistent(relval, p) 0 - /* * on the H8 a couple of the relocations have an instruction in the * top byte. As there can only be 24bits of address space, we just @@ -22,7 +17,7 @@ #define flat_get_relocate_addr(rel) (rel & ~0x00000001) static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags, - u32 *addr, u32 *persistent) + u32 *addr) { u32 val = get_unaligned((__force u32 *)rp); if (!(flags & FLAT_FLAG_GOTPIC)) diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index e01df3f2f80d..ecc44926737b 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -354,3 +354,4 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 00f5c98a5e05..c518d695c376 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -3,12 +3,14 @@ config M68K bool default y select ARCH_32BIT_OFF_T + select ARCH_HAS_BINFMT_FLAT select ARCH_HAS_DMA_MMAP_PGPROT if MMU && !COLDFIRE - select ARCH_HAS_DMA_PREP_COHERENT + select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA select ARCH_MIGHT_HAVE_PC_PARPORT if ISA select ARCH_NO_COHERENT_DMA_MMAP if !MMU select ARCH_NO_PREEMPT if !COLDFIRE + select BINFMT_FLAT_ARGVP_ENVP_ON_STACK select DMA_DIRECT_REMAP if HAS_DMA && MMU && !COLDFIRE select HAVE_IDE select HAVE_AOUT if MMU diff --git a/arch/m68k/include/asm/flat.h b/arch/m68k/include/asm/flat.h index 4f1d1e373420..46379e08cdd6 100644 --- a/arch/m68k/include/asm/flat.h +++ b/arch/m68k/include/asm/flat.h @@ -6,35 +6,7 @@ #ifndef __M68KNOMMU_FLAT_H__ #define __M68KNOMMU_FLAT_H__ -#include <linux/uaccess.h> - -#define flat_argvp_envp_on_stack() 1 -#define flat_old_ram_flag(flags) (flags) -#define flat_reloc_valid(reloc, size) ((reloc) <= (size)) -static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags, - u32 *addr, u32 *persistent) -{ -#ifdef CONFIG_CPU_HAS_NO_UNALIGNED - return copy_from_user(addr, rp, 4) ? -EFAULT : 0; -#else - return get_user(*addr, rp); -#endif -} - -static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel) -{ -#ifdef CONFIG_CPU_HAS_NO_UNALIGNED - return copy_to_user(rp, &addr, 4) ? -EFAULT : 0; -#else - return put_user(addr, rp); -#endif -} -#define flat_get_relocate_addr(rel) (rel) - -static inline int flat_set_persistent(u32 relval, u32 *persistent) -{ - return 0; -} +#include <asm-generic/flat.h> #define FLAT_PLAT_INIT(regs) \ do { \ diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 7e3d0734b2f3..9a3eb2558568 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -433,3 +433,4 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open diff --git a/arch/m68k/q40/README b/arch/m68k/q40/README index 93f4c4cd3c45..a4991d2d8af6 100644 --- a/arch/m68k/q40/README +++ b/arch/m68k/q40/README @@ -31,7 +31,7 @@ drivers used by the Q40, apart from the very obvious (console etc.): char/joystick/* # most of this should work, not # in default config.in block/q40ide.c # startup for ide - ide* # see Documentation/ide/ide.txt + ide* # see Documentation/ide/ide.rst floppy.c # normal PC driver, DMA emu in asm/floppy.h # and arch/m68k/kernel/entry.S # see drivers/block/README.fd diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index f11433daab4a..d411de05b628 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -3,6 +3,7 @@ config MICROBLAZE def_bool y select ARCH_32BIT_OFF_T select ARCH_NO_SWAP + select ARCH_HAS_BINFMT_FLAT if !MMU select ARCH_HAS_DMA_COHERENT_TO_PFN if MMU select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_SYNC_DMA_FOR_CPU diff --git a/arch/microblaze/Kconfig.debug b/arch/microblaze/Kconfig.debug index 3a343188d86c..865527ac332a 100644 --- a/arch/microblaze/Kconfig.debug +++ b/arch/microblaze/Kconfig.debug @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only # For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. +# see Documentation/kbuild/kconfig-language.rst. config TRACE_IRQFLAGS_SUPPORT def_bool y diff --git a/arch/microblaze/Kconfig.platform b/arch/microblaze/Kconfig.platform index 5bf54c1d4f60..7795f90dad86 100644 --- a/arch/microblaze/Kconfig.platform +++ b/arch/microblaze/Kconfig.platform @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only # For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. +# see Documentation/kbuild/kconfig-language.rst. # # Platform selection Kconfig menu for MicroBlaze targets # diff --git a/arch/microblaze/include/asm/flat.h b/arch/microblaze/include/asm/flat.h index 3d2747d4c967..1ab86770eaee 100644 --- a/arch/microblaze/include/asm/flat.h +++ b/arch/microblaze/include/asm/flat.h @@ -13,11 +13,6 @@ #include <asm/unaligned.h> -#define flat_argvp_envp_on_stack() 0 -#define flat_old_ram_flag(flags) (flags) -#define flat_reloc_valid(reloc, size) ((reloc) <= (size)) -#define flat_set_persistent(relval, p) 0 - /* * Microblaze works a little differently from other arches, because * of the MICROBLAZE_64 reloc type. Here, a 32 bit address is split @@ -33,7 +28,7 @@ */ static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags, - u32 *addr, u32 *persistent) + u32 *addr) { u32 *p = (__force u32 *)rp; diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 26339e417695..09b0cd7dab0a 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -439,3 +439,5 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open +435 common clone3 sys_clone3 diff --git a/arch/mips/boot/dts/mscc/ocelot.dtsi b/arch/mips/boot/dts/mscc/ocelot.dtsi index 90c60d42f571..33ae74aaa1bb 100644 --- a/arch/mips/boot/dts/mscc/ocelot.dtsi +++ b/arch/mips/boot/dts/mscc/ocelot.dtsi @@ -132,11 +132,12 @@ <0x1270000 0x100>, <0x1280000 0x100>, <0x1800000 0x80000>, - <0x1880000 0x10000>; + <0x1880000 0x10000>, + <0x1060000 0x10000>; reg-names = "sys", "rew", "qs", "port0", "port1", "port2", "port3", "port4", "port5", "port6", "port7", "port8", "port9", "port10", "qsys", - "ana"; + "ana", "s2"; interrupts = <21 22>; interrupt-names = "xtr", "inj"; diff --git a/arch/mips/boot/dts/qca/ar9331.dtsi b/arch/mips/boot/dts/qca/ar9331.dtsi index 2bae201aa365..63a9f33aa43e 100644 --- a/arch/mips/boot/dts/qca/ar9331.dtsi +++ b/arch/mips/boot/dts/qca/ar9331.dtsi @@ -116,6 +116,32 @@ }; }; + eth0: ethernet@19000000 { + compatible = "qca,ar9330-eth"; + reg = <0x19000000 0x200>; + interrupts = <4>; + + resets = <&rst 9>, <&rst 22>; + reset-names = "mac", "mdio"; + clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>; + clock-names = "eth", "mdio"; + + status = "disabled"; + }; + + eth1: ethernet@1a000000 { + compatible = "qca,ar9330-eth"; + reg = <0x1a000000 0x200>; + interrupts = <5>; + + resets = <&rst 13>, <&rst 23>; + reset-names = "mac", "mdio"; + clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>; + clock-names = "eth", "mdio"; + + status = "disabled"; + }; + usb: usb@1b000100 { compatible = "chipidea,usb2"; reg = <0x1b000000 0x200>; diff --git a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts index e7af2cf5f4c1..77bab823eb3b 100644 --- a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts +++ b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts @@ -76,3 +76,11 @@ reg = <0>; }; }; + +ð0 { + status = "okay"; +}; + +ð1 { + status = "okay"; +}; diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index 0ee5e677662e..0de92ac1ca64 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -210,7 +210,6 @@ CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m -CONFIG_NET_CLS_IND=y CONFIG_CFG80211=m CONFIG_MAC80211=m CONFIG_MAC80211_MESH=y diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig index 041bffac043b..efc3abace048 100644 --- a/arch/mips/configs/malta_kvm_defconfig +++ b/arch/mips/configs/malta_kvm_defconfig @@ -215,7 +215,6 @@ CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m -CONFIG_NET_CLS_IND=y CONFIG_CFG80211=m CONFIG_MAC80211=m CONFIG_MAC80211_MESH=y diff --git a/arch/mips/configs/malta_kvm_guest_defconfig b/arch/mips/configs/malta_kvm_guest_defconfig index 511065e62182..c6ceeca4394d 100644 --- a/arch/mips/configs/malta_kvm_guest_defconfig +++ b/arch/mips/configs/malta_kvm_guest_defconfig @@ -212,7 +212,6 @@ CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m -CONFIG_NET_CLS_IND=y CONFIG_CFG80211=m CONFIG_MAC80211=m CONFIG_MAC80211_MESH=y diff --git a/arch/mips/configs/malta_qemu_32r6_defconfig b/arch/mips/configs/malta_qemu_32r6_defconfig index 299088043164..e6c600dc1814 100644 --- a/arch/mips/configs/malta_qemu_32r6_defconfig +++ b/arch/mips/configs/malta_qemu_32r6_defconfig @@ -74,7 +74,6 @@ CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=y -CONFIG_NET_CLS_IND=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/mips/configs/maltaaprp_defconfig b/arch/mips/configs/maltaaprp_defconfig index 2b4b3a24f637..82b44b774553 100644 --- a/arch/mips/configs/maltaaprp_defconfig +++ b/arch/mips/configs/maltaaprp_defconfig @@ -76,7 +76,6 @@ CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=y -CONFIG_NET_CLS_IND=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig index 425ddfd7cd78..4190fc6189a0 100644 --- a/arch/mips/configs/maltasmvp_defconfig +++ b/arch/mips/configs/maltasmvp_defconfig @@ -77,7 +77,6 @@ CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=y -CONFIG_NET_CLS_IND=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/mips/configs/maltasmvp_eva_defconfig b/arch/mips/configs/maltasmvp_eva_defconfig index 8beaa7ba1e52..a13c10e910ec 100644 --- a/arch/mips/configs/maltasmvp_eva_defconfig +++ b/arch/mips/configs/maltasmvp_eva_defconfig @@ -78,7 +78,6 @@ CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=y -CONFIG_NET_CLS_IND=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/mips/configs/maltaup_defconfig b/arch/mips/configs/maltaup_defconfig index 6e8b95ceb54a..b35f1fc690fb 100644 --- a/arch/mips/configs/maltaup_defconfig +++ b/arch/mips/configs/maltaup_defconfig @@ -75,7 +75,6 @@ CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=y -CONFIG_NET_CLS_IND=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig index 6c026db96ff9..56861aef2756 100644 --- a/arch/mips/configs/maltaup_xpa_defconfig +++ b/arch/mips/configs/maltaup_xpa_defconfig @@ -212,7 +212,6 @@ CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m -CONFIG_NET_CLS_IND=y CONFIG_CFG80211=m CONFIG_MAC80211=m CONFIG_MAC80211_MESH=y diff --git a/arch/mips/configs/rb532_defconfig b/arch/mips/configs/rb532_defconfig index 50632a3103dd..864c70fbe668 100644 --- a/arch/mips/configs/rb532_defconfig +++ b/arch/mips/configs/rb532_defconfig @@ -103,7 +103,6 @@ CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_PEDIT=m -CONFIG_NET_CLS_IND=y CONFIG_HAMRADIO=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index d41765cfbc6e..d0a9ed2ca2d6 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -133,6 +133,8 @@ #define SO_RCVTIMEO_NEW 66 #define SO_SNDTIMEO_NEW 67 +#define SO_DETACH_REUSEPORT_BPF 68 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 0e2dd68ade57..97035e19ad03 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -372,3 +372,4 @@ 431 n32 fsconfig sys_fsconfig 432 n32 fsmount sys_fsmount 433 n32 fspick sys_fspick +434 n32 pidfd_open sys_pidfd_open diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 5eebfa0d155c..d7292722d3b0 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -348,3 +348,4 @@ 431 n64 fsconfig sys_fsconfig 432 n64 fsmount sys_fsmount 433 n64 fspick sys_fspick +434 n64 pidfd_open sys_pidfd_open diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 3cc1374e02d0..dba084c92f14 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -421,3 +421,4 @@ 431 o32 fsconfig sys_fsconfig 432 o32 fsmount sys_fsmount 433 o32 fspick sys_fspick +434 o32 pidfd_open sys_pidfd_open diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 3299e287a477..fd0d0639454f 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only # # For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. +# see Documentation/kbuild/kconfig-language.rst. # config NDS32 diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index 7cfb20555b10..bf326f0edd2f 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # # For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. +# see Documentation/kbuild/kconfig-language.rst. # config OPENRISC diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 4860efa91d7b..42875ff15671 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -59,6 +59,8 @@ config PARISC select HAVE_ARCH_KGDB select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_DYNAMIC_FTRACE if $(cc-option,-fpatchable-function-entry=1,1) + select HAVE_FTRACE_MCOUNT_RECORD if HAVE_DYNAMIC_FTRACE help The PA-RISC microprocessor is designed by Hewlett-Packard and used diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index c19af26febe6..58d46665cad9 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -47,6 +47,24 @@ ifneq ($(SUBARCH),$(UTS_MACHINE)) endif endif +ifdef CONFIG_DYNAMIC_FTRACE +ifdef CONFIG_64BIT +NOP_COUNT := 8 +else +NOP_COUNT := 5 +endif + +export CC_USING_RECORD_MCOUNT:=1 +export CC_USING_PATCHABLE_FUNCTION_ENTRY:=1 + +KBUILD_AFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY=1 +KBUILD_CFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY=1 \ + -DFTRACE_PATCHABLE_FUNCTION_SIZE=$(NOP_COUNT) + +CC_FLAGS_FTRACE := -fpatchable-function-entry=$(NOP_COUNT),$(shell echo $$(($(NOP_COUNT)-1))) +KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/parisc/kernel/module.lds +endif + OBJCOPY_FLAGS =-O binary -R .note -R .comment -S cflags-y := -pipe diff --git a/arch/parisc/include/asm/ftrace.h b/arch/parisc/include/asm/ftrace.h index 42b2c75a1645..958c0aa5dbb2 100644 --- a/arch/parisc/include/asm/ftrace.h +++ b/arch/parisc/include/asm/ftrace.h @@ -5,12 +5,23 @@ #ifndef __ASSEMBLY__ extern void mcount(void); -#define MCOUNT_INSN_SIZE 4 - +#define MCOUNT_ADDR ((unsigned long)mcount) +#define MCOUNT_INSN_SIZE 4 +#define CC_USING_NOP_MCOUNT extern unsigned long sys_call_table[]; extern unsigned long return_address(unsigned int); +#ifdef CONFIG_DYNAMIC_FTRACE +extern void ftrace_caller(void); + +struct dyn_arch_ftrace { +}; + +unsigned long ftrace_call_adjust(unsigned long addr); + +#endif + #define ftrace_return_address(n) return_address(n) #endif /* __ASSEMBLY__ */ diff --git a/arch/parisc/include/asm/patch.h b/arch/parisc/include/asm/patch.h index 685b58a13968..400d84c6e504 100644 --- a/arch/parisc/include/asm/patch.h +++ b/arch/parisc/include/asm/patch.h @@ -4,8 +4,10 @@ /* stop machine and patch kernel text */ void patch_text(void *addr, unsigned int insn); +void patch_text_multiple(void *addr, u32 *insn, unsigned int len); /* patch kernel text with machine already stopped (e.g. in kgdb) */ -void __patch_text(void *addr, unsigned int insn); +void __patch_text(void *addr, u32 insn); +void __patch_text_multiple(void *addr, u32 *insn, unsigned int len); #endif diff --git a/arch/parisc/include/asm/psw.h b/arch/parisc/include/asm/psw.h index 76c301146c31..46921ffcc407 100644 --- a/arch/parisc/include/asm/psw.h +++ b/arch/parisc/include/asm/psw.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _PARISC_PSW_H - +#define _PARISC_PSW_H #define PSW_I 0x00000001 #define PSW_D 0x00000002 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 66c5dd245ac7..10173c32195e 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -114,6 +114,8 @@ #define SO_RCVTIMEO_NEW 0x4040 #define SO_SNDTIMEO_NEW 0x4041 +#define SO_DETACH_REUSEPORT_BPF 0x4042 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index fc0df5c44468..c232266b517c 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile @@ -14,10 +14,11 @@ obj-y := cache.o pacache.o setup.o pdt.o traps.o time.o irq.o \ ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities -CFLAGS_REMOVE_ftrace.o = -pg -CFLAGS_REMOVE_cache.o = -pg -CFLAGS_REMOVE_perf.o = -pg -CFLAGS_REMOVE_unwind.o = -pg +CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_cache.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_perf.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_unwind.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) endif obj-$(CONFIG_SMP) += smp.o diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 89c801c2b5d1..3e430590c1e1 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -2012,6 +2012,70 @@ ftrace_stub: #endif ENDPROC_CFI(mcount) +#ifdef CONFIG_DYNAMIC_FTRACE + +#ifdef CONFIG_64BIT +#define FTRACE_FRAME_SIZE (2*FRAME_SIZE) +#else +#define FTRACE_FRAME_SIZE FRAME_SIZE +#endif +ENTRY_CFI(ftrace_caller, caller,frame=FTRACE_FRAME_SIZE,CALLS,SAVE_RP,SAVE_SP) +ftrace_caller: + .global ftrace_caller + + STREG %r3, -FTRACE_FRAME_SIZE+1*REG_SZ(%sp) + ldo -FTRACE_FRAME_SIZE(%sp), %r3 + STREG %rp, -RP_OFFSET(%r3) + + /* Offset 0 is already allocated for %r1 */ + STREG %r23, 2*REG_SZ(%r3) + STREG %r24, 3*REG_SZ(%r3) + STREG %r25, 4*REG_SZ(%r3) + STREG %r26, 5*REG_SZ(%r3) + STREG %r28, 6*REG_SZ(%r3) + STREG %r29, 7*REG_SZ(%r3) +#ifdef CONFIG_64BIT + STREG %r19, 8*REG_SZ(%r3) + STREG %r20, 9*REG_SZ(%r3) + STREG %r21, 10*REG_SZ(%r3) + STREG %r22, 11*REG_SZ(%r3) + STREG %r27, 12*REG_SZ(%r3) + STREG %r31, 13*REG_SZ(%r3) + loadgp + ldo -16(%sp),%r29 +#endif + LDREG 0(%r3), %r25 + copy %rp, %r26 + ldo -8(%r25), %r25 + b,l ftrace_function_trampoline, %rp + copy %r3, %r24 + + LDREG -RP_OFFSET(%r3), %rp + LDREG 2*REG_SZ(%r3), %r23 + LDREG 3*REG_SZ(%r3), %r24 + LDREG 4*REG_SZ(%r3), %r25 + LDREG 5*REG_SZ(%r3), %r26 + LDREG 6*REG_SZ(%r3), %r28 + LDREG 7*REG_SZ(%r3), %r29 +#ifdef CONFIG_64BIT + LDREG 8*REG_SZ(%r3), %r19 + LDREG 9*REG_SZ(%r3), %r20 + LDREG 10*REG_SZ(%r3), %r21 + LDREG 11*REG_SZ(%r3), %r22 + LDREG 12*REG_SZ(%r3), %r27 + LDREG 13*REG_SZ(%r3), %r31 +#endif + LDREG 1*REG_SZ(%r3), %r3 + + LDREGM -FTRACE_FRAME_SIZE(%sp), %r1 + /* Adjust return point to jump back to beginning of traced function */ + ldo -4(%r1), %r1 + bv,n (%r1) + +ENDPROC_CFI(ftrace_caller) + +#endif + #ifdef CONFIG_FUNCTION_GRAPH_TRACER .align 8 ENTRY_CFI(return_to_handler, caller,frame=FRAME_SIZE) diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index a28f915993b1..d784ccdd8fef 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -7,17 +7,17 @@ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> * * future possible enhancements: - * - add CONFIG_DYNAMIC_FTRACE * - add CONFIG_STACK_TRACER */ #include <linux/init.h> #include <linux/ftrace.h> +#include <linux/uaccess.h> #include <asm/assembly.h> #include <asm/sections.h> #include <asm/ftrace.h> - +#include <asm/patch.h> #define __hot __attribute__ ((__section__ (".text.hot"))) @@ -50,13 +50,11 @@ void notrace __hot ftrace_function_trampoline(unsigned long parent, unsigned long self_addr, unsigned long org_sp_gr3) { - extern ftrace_func_t ftrace_trace_function; /* depends on CONFIG_DYNAMIC_FTRACE */ - - if (ftrace_trace_function != ftrace_stub) { - /* struct ftrace_ops *op, struct pt_regs *regs); */ - ftrace_trace_function(parent, self_addr, NULL, NULL); - return; - } +#ifndef CONFIG_DYNAMIC_FTRACE + extern ftrace_func_t ftrace_trace_function; +#endif + if (ftrace_trace_function != ftrace_stub) + ftrace_trace_function(self_addr, parent, NULL, NULL); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if (ftrace_graph_return != (trace_func_graph_ret_t) ftrace_stub || @@ -75,3 +73,116 @@ void notrace __hot ftrace_function_trampoline(unsigned long parent, #endif } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +int ftrace_enable_ftrace_graph_caller(void) +{ + return 0; +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return 0; +} +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE + +int __init ftrace_dyn_arch_init(void) +{ + return 0; +} +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + return 0; +} + +unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr+(FTRACE_PATCHABLE_FUNCTION_SIZE-1)*4; +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + u32 insn[FTRACE_PATCHABLE_FUNCTION_SIZE]; + u32 *tramp; + int size, ret, i; + void *ip; + +#ifdef CONFIG_64BIT + unsigned long addr2 = + (unsigned long)dereference_function_descriptor((void *)addr); + + u32 ftrace_trampoline[] = { + 0x73c10208, /* std,ma r1,100(sp) */ + 0x0c2110c1, /* ldd -10(r1),r1 */ + 0xe820d002, /* bve,n (r1) */ + addr2 >> 32, + addr2 & 0xffffffff, + 0xe83f1fd7, /* b,l,n .-14,r1 */ + }; + + u32 ftrace_trampoline_unaligned[] = { + addr2 >> 32, + addr2 & 0xffffffff, + 0x37de0200, /* ldo 100(sp),sp */ + 0x73c13e01, /* std r1,-100(sp) */ + 0x34213ff9, /* ldo -4(r1),r1 */ + 0x50213fc1, /* ldd -20(r1),r1 */ + 0xe820d002, /* bve,n (r1) */ + 0xe83f1fcf, /* b,l,n .-20,r1 */ + }; + + BUILD_BUG_ON(ARRAY_SIZE(ftrace_trampoline_unaligned) > + FTRACE_PATCHABLE_FUNCTION_SIZE); +#else + u32 ftrace_trampoline[] = { + (u32)addr, + 0x6fc10080, /* stw,ma r1,40(sp) */ + 0x48213fd1, /* ldw -18(r1),r1 */ + 0xe820c002, /* bv,n r0(r1) */ + 0xe83f1fdf, /* b,l,n .-c,r1 */ + }; +#endif + + BUILD_BUG_ON(ARRAY_SIZE(ftrace_trampoline) > + FTRACE_PATCHABLE_FUNCTION_SIZE); + + size = sizeof(ftrace_trampoline); + tramp = ftrace_trampoline; + +#ifdef CONFIG_64BIT + if (rec->ip & 0x4) { + size = sizeof(ftrace_trampoline_unaligned); + tramp = ftrace_trampoline_unaligned; + } +#endif + + ip = (void *)(rec->ip + 4 - size); + + ret = probe_kernel_read(insn, ip, size); + if (ret) + return ret; + + for (i = 0; i < size / 4; i++) { + if (insn[i] != INSN_NOP) + return -EINVAL; + } + + __patch_text_multiple(ip, tramp, size); + return 0; +} + +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) +{ + u32 insn[FTRACE_PATCHABLE_FUNCTION_SIZE]; + int i; + + for (i = 0; i < ARRAY_SIZE(insn); i++) + insn[i] = INSN_NOP; + + __patch_text_multiple((void *)rec->ip + 4 - sizeof(insn), + insn, sizeof(insn)); + return 0; +} +#endif diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 1f0f29a289d3..ac5f34993b53 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -33,9 +33,9 @@ * However, SEGREL32 is used only for PARISC unwind entries, and we want * those entries to have an absolute address, and not just an offset. * - * The unwind table mechanism has the ability to specify an offset for + * The unwind table mechanism has the ability to specify an offset for * the unwind table; however, because we split off the init functions into - * a different piece of memory, it is not possible to do this using a + * a different piece of memory, it is not possible to do this using a * single offset. Instead, we use the above hack for now. */ @@ -53,12 +53,6 @@ #include <asm/unwind.h> #include <asm/sections.h> -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(fmt...) -#endif - #define RELOC_REACHABLE(val, bits) \ (( ( !((val) & (1<<((bits)-1))) && ((val)>>(bits)) != 0 ) || \ ( ((val) & (1<<((bits)-1))) && ((val)>>(bits)) != (((__typeof__(val))(~0))>>((bits)+2)))) ? \ @@ -300,7 +294,7 @@ unsigned int arch_mod_section_prepend(struct module *mod, * sizeof(struct stub_entry); } -#define CONST +#define CONST int module_frob_arch_sections(CONST Elf_Ehdr *hdr, CONST Elf_Shdr *sechdrs, CONST char *secstrings, @@ -386,7 +380,7 @@ static Elf64_Word get_got(struct module *me, unsigned long value, long addend) got[i].addr = value; out: - DEBUGP("GOT ENTRY %d[%x] val %lx\n", i, i*sizeof(struct got_entry), + pr_debug("GOT ENTRY %d[%lx] val %lx\n", i, i*sizeof(struct got_entry), value); return i * sizeof(struct got_entry); } @@ -539,7 +533,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, //unsigned long dp = (unsigned long)$global$; register unsigned long dp asm ("r27"); - DEBUGP("Applying relocate section %u to %u\n", relsec, + pr_debug("Applying relocate section %u to %u\n", relsec, targetsec); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { /* This is where to make the change */ @@ -563,7 +557,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, #if 0 #define r(t) ELF32_R_TYPE(rel[i].r_info)==t ? #t : - DEBUGP("Symbol %s loc 0x%x val 0x%x addend 0x%x: %s\n", + pr_debug("Symbol %s loc 0x%x val 0x%x addend 0x%x: %s\n", strtab + sym->st_name, (uint32_t)loc, val, addend, r(R_PARISC_PLABEL32) @@ -604,7 +598,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, /* See note about special handling of SEGREL32 at * the beginning of this file. */ - *loc = fsel(val, addend); + *loc = fsel(val, addend); break; case R_PARISC_SECREL32: /* 32-bit section relative address. */ @@ -683,7 +677,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, Elf_Addr loc0; unsigned int targetsec = sechdrs[relsec].sh_info; - DEBUGP("Applying relocate section %u to %u\n", relsec, + pr_debug("Applying relocate section %u to %u\n", relsec, targetsec); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { /* This is where to make the change */ @@ -725,7 +719,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, case R_PARISC_LTOFF21L: /* LT-relative; left 21 bits */ val = get_got(me, val, addend); - DEBUGP("LTOFF21L Symbol %s loc %p val %lx\n", + pr_debug("LTOFF21L Symbol %s loc %p val %llx\n", strtab + sym->st_name, loc, val); val = lrsel(val, 0); @@ -736,14 +730,14 @@ int apply_relocate_add(Elf_Shdr *sechdrs, /* LT-relative; right 14 bits */ val = get_got(me, val, addend); val = rrsel(val, 0); - DEBUGP("LTOFF14R Symbol %s loc %p val %lx\n", + pr_debug("LTOFF14R Symbol %s loc %p val %llx\n", strtab + sym->st_name, loc, val); *loc = mask(*loc, 14) | reassemble_14(val); break; case R_PARISC_PCREL22F: /* PC-relative; 22 bits */ - DEBUGP("PCREL22F Symbol %s loc %p val %lx\n", + pr_debug("PCREL22F Symbol %s loc %p val %llx\n", strtab + sym->st_name, loc, val); val += addend; @@ -775,7 +769,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, val = get_stub(me, val, addend, ELF_STUB_GOT, loc0, targetsec); } - DEBUGP("STUB FOR %s loc %lx, val %lx+%lx at %lx\n", + pr_debug("STUB FOR %s loc %px, val %llx+%llx at %llx\n", strtab + sym->st_name, loc, sym->st_value, addend, val); val = (val - dot - 8)/4; @@ -799,7 +793,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, /* See note about special handling of SEGREL32 at * the beginning of this file. */ - *loc = fsel(val, addend); + *loc = fsel(val, addend); break; case R_PARISC_SECREL32: /* 32-bit section relative address. */ @@ -809,14 +803,14 @@ int apply_relocate_add(Elf_Shdr *sechdrs, /* 64-bit function address */ if(in_local(me, (void *)(val + addend))) { *loc64 = get_fdesc(me, val+addend); - DEBUGP("FDESC for %s at %p points to %lx\n", + pr_debug("FDESC for %s at %llx points to %llx\n", strtab + sym->st_name, *loc64, ((Elf_Fdesc *)*loc64)->addr); } else { /* if the symbol is not local to this * module then val+addend is a pointer * to the function descriptor */ - DEBUGP("Non local FPTR64 Symbol %s loc %p val %lx\n", + pr_debug("Non local FPTR64 Symbol %s loc %p val %llx\n", strtab + sym->st_name, loc, val); *loc64 = val + addend; @@ -847,7 +841,7 @@ register_unwind_table(struct module *me, end = table + sechdrs[me->arch.unwind_section].sh_size; gp = (Elf_Addr)me->core_layout.base + me->arch.got_offset; - DEBUGP("register_unwind_table(), sect = %d at 0x%p - 0x%p (gp=0x%lx)\n", + pr_debug("register_unwind_table(), sect = %d at 0x%p - 0x%p (gp=0x%lx)\n", me->arch.unwind_section, table, end, gp); me->arch.unwind = unwind_table_add(me->name, 0, gp, table, end); } @@ -868,6 +862,7 @@ int module_finalize(const Elf_Ehdr *hdr, const char *strtab = NULL; const Elf_Shdr *s; char *secstrings; + int err, symindex = -1; Elf_Sym *newptr, *oldptr; Elf_Shdr *symhdr = NULL; #ifdef DEBUG @@ -894,6 +889,7 @@ int module_finalize(const Elf_Ehdr *hdr, if(sechdrs[i].sh_type == SHT_SYMTAB && (sechdrs[i].sh_flags & SHF_ALLOC)) { int strindex = sechdrs[i].sh_link; + symindex = i; /* FIXME: AWFUL HACK * The cast is to drop the const from * the sechdrs pointer */ @@ -903,7 +899,7 @@ int module_finalize(const Elf_Ehdr *hdr, } } - DEBUGP("module %s: strtab %p, symhdr %p\n", + pr_debug("module %s: strtab %p, symhdr %p\n", me->name, strtab, symhdr); if(me->arch.got_count > MAX_GOTS) { @@ -922,7 +918,7 @@ int module_finalize(const Elf_Ehdr *hdr, oldptr = (void *)symhdr->sh_addr; newptr = oldptr + 1; /* we start counting at 1 */ nsyms = symhdr->sh_size / sizeof(Elf_Sym); - DEBUGP("OLD num_symtab %lu\n", nsyms); + pr_debug("OLD num_symtab %lu\n", nsyms); for (i = 1; i < nsyms; i++) { oldptr++; /* note, count starts at 1 so preincrement */ @@ -937,7 +933,7 @@ int module_finalize(const Elf_Ehdr *hdr, } nsyms = newptr - (Elf_Sym *)symhdr->sh_addr; - DEBUGP("NEW num_symtab %lu\n", nsyms); + pr_debug("NEW num_symtab %lu\n", nsyms); symhdr->sh_size = nsyms * sizeof(Elf_Sym); /* find .altinstructions section */ @@ -949,8 +945,24 @@ int module_finalize(const Elf_Ehdr *hdr, if (!strcmp(".altinstructions", secname)) /* patch .altinstructions */ apply_alternatives(aseg, aseg + s->sh_size, me->name); - } + /* For 32 bit kernels we're compiling modules with + * -ffunction-sections so we must relocate the addresses in the + *__mcount_loc section. + */ + if (symindex != -1 && !strcmp(secname, "__mcount_loc")) { + if (s->sh_type == SHT_REL) + err = apply_relocate((Elf_Shdr *)sechdrs, + strtab, symindex, + s - sechdrs, me); + else if (s->sh_type == SHT_RELA) + err = apply_relocate_add((Elf_Shdr *)sechdrs, + strtab, symindex, + s - sechdrs, me); + if (err) + return err; + } + } return 0; } diff --git a/arch/parisc/kernel/module.lds b/arch/parisc/kernel/module.lds new file mode 100644 index 000000000000..1a9a92aca5c8 --- /dev/null +++ b/arch/parisc/kernel/module.lds @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +SECTIONS { + __mcount_loc : { + *(__patchable_function_entries) + } +} diff --git a/arch/parisc/kernel/patch.c b/arch/parisc/kernel/patch.c index cdcd981278b3..80a0ab372802 100644 --- a/arch/parisc/kernel/patch.c +++ b/arch/parisc/kernel/patch.c @@ -17,15 +17,20 @@ struct patch { void *addr; - unsigned int insn; + u32 *insn; + unsigned int len; }; -static void __kprobes *patch_map(void *addr, int fixmap) +static DEFINE_RAW_SPINLOCK(patch_lock); + +static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags, + int *need_unmap) { unsigned long uintaddr = (uintptr_t) addr; bool module = !core_kernel_text(uintaddr); struct page *page; + *need_unmap = 0; if (module && IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) page = vmalloc_to_page(addr); else if (!module && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) @@ -33,36 +38,74 @@ static void __kprobes *patch_map(void *addr, int fixmap) else return addr; + *need_unmap = 1; set_fixmap(fixmap, page_to_phys(page)); + if (flags) + raw_spin_lock_irqsave(&patch_lock, *flags); + else + __acquire(&patch_lock); return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK)); } -static void __kprobes patch_unmap(int fixmap) +static void __kprobes patch_unmap(int fixmap, unsigned long *flags) { clear_fixmap(fixmap); + + if (flags) + raw_spin_unlock_irqrestore(&patch_lock, *flags); + else + __release(&patch_lock); +} + +void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) +{ + unsigned long start = (unsigned long)addr; + unsigned long end = (unsigned long)addr + len; + unsigned long flags; + u32 *p, *fixmap; + int mapped; + + /* Make sure we don't have any aliases in cache */ + flush_kernel_vmap_range(addr, len); + flush_icache_range(start, end); + + p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, &mapped); + + while (len >= 4) { + *p++ = *insn++; + addr += sizeof(u32); + len -= sizeof(u32); + if (len && offset_in_page(addr) == 0) { + /* + * We're crossing a page boundary, so + * need to remap + */ + flush_kernel_vmap_range((void *)fixmap, + (p-fixmap) * sizeof(*p)); + if (mapped) + patch_unmap(FIX_TEXT_POKE0, &flags); + p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, + &mapped); + } + } + + flush_kernel_vmap_range((void *)fixmap, (p-fixmap) * sizeof(*p)); + if (mapped) + patch_unmap(FIX_TEXT_POKE0, &flags); + flush_icache_range(start, end); } -void __kprobes __patch_text(void *addr, unsigned int insn) +void __kprobes __patch_text(void *addr, u32 insn) { - void *waddr = addr; - int size; - - waddr = patch_map(addr, FIX_TEXT_POKE0); - *(u32 *)waddr = insn; - size = sizeof(u32); - flush_kernel_vmap_range(waddr, size); - patch_unmap(FIX_TEXT_POKE0); - flush_icache_range((uintptr_t)(addr), - (uintptr_t)(addr) + size); + __patch_text_multiple(addr, &insn, sizeof(insn)); } static int __kprobes patch_text_stop_machine(void *data) { struct patch *patch = data; - __patch_text(patch->addr, patch->insn); - + __patch_text_multiple(patch->addr, patch->insn, patch->len); return 0; } @@ -70,7 +113,20 @@ void __kprobes patch_text(void *addr, unsigned int insn) { struct patch patch = { .addr = addr, + .insn = &insn, + .len = sizeof(insn), + }; + + stop_machine_cpuslocked(patch_text_stop_machine, &patch, NULL); +} + +void __kprobes patch_text_multiple(void *addr, u32 *insn, unsigned int len) +{ + + struct patch patch = { + .addr = addr, .insn = insn, + .len = len }; stop_machine_cpuslocked(patch_text_stop_machine, &patch, NULL); diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index c9e377d59232..5022b9e179c2 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -430,3 +430,4 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index cd33b4feacb1..99cd24f2ea01 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -18,6 +18,8 @@ *(.data..vm0.pgd) \ *(.data..vm0.pte) +#define CC_USING_PATCHABLE_FUNCTION_ENTRY + #include <asm-generic/vmlinux.lds.h> /* needed for the processor specific cache alignment size */ diff --git a/arch/parisc/mm/fixmap.c b/arch/parisc/mm/fixmap.c index c8d41b54fb19..474cd241c150 100644 --- a/arch/parisc/mm/fixmap.c +++ b/arch/parisc/mm/fixmap.c @@ -10,7 +10,7 @@ #include <asm/cacheflush.h> #include <asm/fixmap.h> -void set_fixmap(enum fixed_addresses idx, phys_addr_t phys) +void notrace set_fixmap(enum fixed_addresses idx, phys_addr_t phys) { unsigned long vaddr = __fix_to_virt(idx); pgd_t *pgd = pgd_offset_k(vaddr); @@ -28,13 +28,16 @@ void set_fixmap(enum fixed_addresses idx, phys_addr_t phys) flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE); } -void clear_fixmap(enum fixed_addresses idx) +void notrace clear_fixmap(enum fixed_addresses idx) { unsigned long vaddr = __fix_to_virt(idx); pgd_t *pgd = pgd_offset_k(vaddr); pmd_t *pmd = pmd_offset(pgd, vaddr); pte_t *pte = pte_offset_kernel(pmd, vaddr); + if (WARN_ON(pte_none(*pte))) + return; + pte_clear(&init_mm, vaddr, pte); flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8c1c636308c8..3b795a0cab62 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -898,7 +898,7 @@ config PPC_MEM_KEYS page-based protections, but without requiring modification of the page tables when an application changes protection domains. - For details, see Documentation/vm/protection-keys.rst + For details, see Documentation/core-api/protection-keys.rst If unsure, say y. diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 7c6baf6df139..aa51b9b66fa2 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -301,7 +301,6 @@ CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m -CONFIG_NET_CLS_IND=y CONFIG_IRDA=m CONFIG_IRLAN=m CONFIG_IRNET=m diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 103655d84b4b..f2c3bda2d39f 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -515,3 +515,4 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index c2ee6041f02c..02a59946a78a 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -500,6 +500,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */ /* slw clears top 32 bits */ PPC_SLW(dst_reg, dst_reg, src_reg); + /* skip zero extension move, but set address map. */ + if (insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; break; case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ PPC_SLD(dst_reg, dst_reg, src_reg); @@ -507,6 +510,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */ /* with imm 0, we still need to clear top 32 bits */ PPC_SLWI(dst_reg, dst_reg, imm); + if (insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; break; case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */ if (imm != 0) @@ -514,12 +519,16 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, break; case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */ PPC_SRW(dst_reg, dst_reg, src_reg); + if (insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; break; case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ PPC_SRD(dst_reg, dst_reg, src_reg); break; case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ PPC_SRWI(dst_reg, dst_reg, imm); + if (insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; break; case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */ if (imm != 0) @@ -544,6 +553,11 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, */ case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */ case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ + if (imm == 1) { + /* special mov32 for zext */ + PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31); + break; + } PPC_MR(dst_reg, src_reg); goto bpf_alu32_trunc; case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ @@ -551,11 +565,13 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, PPC_LI32(dst_reg, imm); if (imm < 0) goto bpf_alu32_trunc; + else if (insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; break; bpf_alu32_trunc: /* Truncate to 32-bits */ - if (BPF_CLASS(code) == BPF_ALU) + if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext) PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31); break; @@ -614,10 +630,13 @@ emit_clear: case 16: /* zero-extend 16 bits into 64 bits */ PPC_RLDICL(dst_reg, dst_reg, 0, 48); + if (insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; break; case 32: - /* zero-extend 32 bits into 64 bits */ - PPC_RLDICL(dst_reg, dst_reg, 0, 32); + if (!fp->aux->verifier_zext) + /* zero-extend 32 bits into 64 bits */ + PPC_RLDICL(dst_reg, dst_reg, 0, 32); break; case 64: /* nop */ @@ -694,14 +713,20 @@ emit_clear: /* dst = *(u8 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_B: PPC_LBZ(dst_reg, src_reg, off); + if (insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; break; /* dst = *(u16 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_H: PPC_LHZ(dst_reg, src_reg, off); + if (insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; break; /* dst = *(u32 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_W: PPC_LWZ(dst_reg, src_reg, off); + if (insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; break; /* dst = *(u64 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_DW: @@ -1042,6 +1067,11 @@ struct powerpc64_jit_data { struct codegen_context ctx; }; +bool bpf_jit_needs_zext(void) +{ + return true; +} + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) { u32 proglen; diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig index e0dbec780fe9..d23288c4abf6 100644 --- a/arch/powerpc/sysdev/Kconfig +++ b/arch/powerpc/sysdev/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. +# see Documentation/kbuild/kconfig-language.rst. # config PPC4xx_PCI_EXPRESS diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 4961deaa3b1d..13a1c0d04e9e 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only # # For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. +# see Documentation/kbuild/kconfig-language.rst. # config 64BIT @@ -17,6 +17,7 @@ config RISCV select OF select OF_EARLY_FLATTREE select OF_IRQ + select ARCH_HAS_BINFMT_FLAT select ARCH_WANT_FRAME_POINTERS select CLONE_BACKWARDS select COMMON_CLK diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index 5ee646619cc3..1efaeddf1e4b 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -5,6 +5,7 @@ generic-y += compat.h generic-y += device.h generic-y += div64.h generic-y += extable.h +generic-y += flat.h generic-y += dma.h generic-y += dma-contiguous.h generic-y += dma-mapping.h diff --git a/arch/riscv/net/bpf_jit_comp.c b/arch/riscv/net/bpf_jit_comp.c index 426d5c33ea90..5451ef3845f2 100644 --- a/arch/riscv/net/bpf_jit_comp.c +++ b/arch/riscv/net/bpf_jit_comp.c @@ -731,6 +731,7 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, { bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 || BPF_CLASS(insn->code) == BPF_JMP; + struct bpf_prog_aux *aux = ctx->prog->aux; int rvoff, i = insn - ctx->prog->insnsi; u8 rd = -1, rs = -1, code = insn->code; s16 off = insn->off; @@ -742,8 +743,13 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, /* dst = src */ case BPF_ALU | BPF_MOV | BPF_X: case BPF_ALU64 | BPF_MOV | BPF_X: + if (imm == 1) { + /* Special mov32 for zext */ + emit_zext_32(rd, ctx); + break; + } emit(is64 ? rv_addi(rd, rs, 0) : rv_addiw(rd, rs, 0), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; @@ -751,49 +757,49 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, case BPF_ALU | BPF_ADD | BPF_X: case BPF_ALU64 | BPF_ADD | BPF_X: emit(is64 ? rv_add(rd, rd, rs) : rv_addw(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_SUB | BPF_X: case BPF_ALU64 | BPF_SUB | BPF_X: emit(is64 ? rv_sub(rd, rd, rs) : rv_subw(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_AND | BPF_X: case BPF_ALU64 | BPF_AND | BPF_X: emit(rv_and(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_OR | BPF_X: case BPF_ALU64 | BPF_OR | BPF_X: emit(rv_or(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_XOR | BPF_X: case BPF_ALU64 | BPF_XOR | BPF_X: emit(rv_xor(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_MUL | BPF_X: case BPF_ALU64 | BPF_MUL | BPF_X: emit(is64 ? rv_mul(rd, rd, rs) : rv_mulw(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_X: emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_LSH | BPF_X: @@ -805,13 +811,13 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, case BPF_ALU | BPF_RSH | BPF_X: case BPF_ALU64 | BPF_RSH | BPF_X: emit(is64 ? rv_srl(rd, rd, rs) : rv_srlw(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_ARSH | BPF_X: case BPF_ALU64 | BPF_ARSH | BPF_X: emit(is64 ? rv_sra(rd, rd, rs) : rv_sraw(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; @@ -820,7 +826,7 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, case BPF_ALU64 | BPF_NEG: emit(is64 ? rv_sub(rd, RV_REG_ZERO, rd) : rv_subw(rd, RV_REG_ZERO, rd), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; @@ -885,7 +891,7 @@ out_be: case BPF_ALU | BPF_MOV | BPF_K: case BPF_ALU64 | BPF_MOV | BPF_K: emit_imm(rd, imm, ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; @@ -900,7 +906,7 @@ out_be: emit(is64 ? rv_add(rd, rd, RV_REG_T1) : rv_addw(rd, rd, RV_REG_T1), ctx); } - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_SUB | BPF_K: @@ -913,7 +919,7 @@ out_be: emit(is64 ? rv_sub(rd, rd, RV_REG_T1) : rv_subw(rd, rd, RV_REG_T1), ctx); } - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_AND | BPF_K: @@ -924,7 +930,7 @@ out_be: emit_imm(RV_REG_T1, imm, ctx); emit(rv_and(rd, rd, RV_REG_T1), ctx); } - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_OR | BPF_K: @@ -935,7 +941,7 @@ out_be: emit_imm(RV_REG_T1, imm, ctx); emit(rv_or(rd, rd, RV_REG_T1), ctx); } - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_XOR | BPF_K: @@ -946,7 +952,7 @@ out_be: emit_imm(RV_REG_T1, imm, ctx); emit(rv_xor(rd, rd, RV_REG_T1), ctx); } - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_MUL | BPF_K: @@ -954,7 +960,7 @@ out_be: emit_imm(RV_REG_T1, imm, ctx); emit(is64 ? rv_mul(rd, rd, RV_REG_T1) : rv_mulw(rd, rd, RV_REG_T1), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_DIV | BPF_K: @@ -962,7 +968,7 @@ out_be: emit_imm(RV_REG_T1, imm, ctx); emit(is64 ? rv_divu(rd, rd, RV_REG_T1) : rv_divuw(rd, rd, RV_REG_T1), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_MOD | BPF_K: @@ -970,7 +976,7 @@ out_be: emit_imm(RV_REG_T1, imm, ctx); emit(is64 ? rv_remu(rd, rd, RV_REG_T1) : rv_remuw(rd, rd, RV_REG_T1), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_LSH | BPF_K: @@ -1263,6 +1269,8 @@ out_be: emit_imm(RV_REG_T1, off, ctx); emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx); emit(rv_lbu(rd, 0, RV_REG_T1), ctx); + if (insn_is_zext(&insn[1])) + return 1; break; case BPF_LDX | BPF_MEM | BPF_H: if (is_12b_int(off)) { @@ -1273,6 +1281,8 @@ out_be: emit_imm(RV_REG_T1, off, ctx); emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx); emit(rv_lhu(rd, 0, RV_REG_T1), ctx); + if (insn_is_zext(&insn[1])) + return 1; break; case BPF_LDX | BPF_MEM | BPF_W: if (is_12b_int(off)) { @@ -1283,6 +1293,8 @@ out_be: emit_imm(RV_REG_T1, off, ctx); emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx); emit(rv_lwu(rd, 0, RV_REG_T1), ctx); + if (insn_is_zext(&insn[1])) + return 1; break; case BPF_LDX | BPF_MEM | BPF_DW: if (is_12b_int(off)) { @@ -1527,6 +1539,11 @@ static void bpf_flush_icache(void *start, void *end) flush_icache_range((unsigned long)start, (unsigned long)end); } +bool bpf_jit_needs_zext(void) +{ + return true; +} + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { bool tmp_blinded = false, extra_pass = false; diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index e822b2964a83..6ebacfeaf853 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -436,3 +436,4 @@ 431 common fsconfig sys_fsconfig sys_fsconfig 432 common fsmount sys_fsmount sys_fsmount 433 common fspick sys_fspick sys_fspick +434 common pidfd_open sys_pidfd_open sys_pidfd_open diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 5e7c63033159..e636728ab452 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -299,9 +299,11 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define EMIT_ZERO(b1) \ ({ \ - /* llgfr %dst,%dst (zero extend to 64 bit) */ \ - EMIT4(0xb9160000, b1, b1); \ - REG_SET_SEEN(b1); \ + if (!fp->aux->verifier_zext) { \ + /* llgfr %dst,%dst (zero extend to 64 bit) */ \ + EMIT4(0xb9160000, b1, b1); \ + REG_SET_SEEN(b1); \ + } \ }) /* @@ -520,6 +522,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */ /* llgfr %dst,%src */ EMIT4(0xb9160000, dst_reg, src_reg); + if (insn_is_zext(&insn[1])) + insn_count = 2; break; case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ /* lgr %dst,%src */ @@ -528,6 +532,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */ /* llilf %dst,imm */ EMIT6_IMM(0xc00f0000, dst_reg, imm); + if (insn_is_zext(&insn[1])) + insn_count = 2; break; case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = imm */ /* lgfi %dst,imm */ @@ -639,6 +645,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i EMIT4(0xb9970000, REG_W0, src_reg); /* llgfr %dst,%rc */ EMIT4(0xb9160000, dst_reg, rc_reg); + if (insn_is_zext(&insn[1])) + insn_count = 2; break; } case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */ @@ -676,6 +684,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i EMIT_CONST_U32(imm)); /* llgfr %dst,%rc */ EMIT4(0xb9160000, dst_reg, rc_reg); + if (insn_is_zext(&insn[1])) + insn_count = 2; break; } case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */ @@ -864,10 +874,13 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i case 16: /* dst = (u16) cpu_to_be16(dst) */ /* llghr %dst,%dst */ EMIT4(0xb9850000, dst_reg, dst_reg); + if (insn_is_zext(&insn[1])) + insn_count = 2; break; case 32: /* dst = (u32) cpu_to_be32(dst) */ - /* llgfr %dst,%dst */ - EMIT4(0xb9160000, dst_reg, dst_reg); + if (!fp->aux->verifier_zext) + /* llgfr %dst,%dst */ + EMIT4(0xb9160000, dst_reg, dst_reg); break; case 64: /* dst = (u64) cpu_to_be64(dst) */ break; @@ -882,12 +895,15 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i EMIT4_DISP(0x88000000, dst_reg, REG_0, 16); /* llghr %dst,%dst */ EMIT4(0xb9850000, dst_reg, dst_reg); + if (insn_is_zext(&insn[1])) + insn_count = 2; break; case 32: /* dst = (u32) cpu_to_le32(dst) */ /* lrvr %dst,%dst */ EMIT4(0xb91f0000, dst_reg, dst_reg); - /* llgfr %dst,%dst */ - EMIT4(0xb9160000, dst_reg, dst_reg); + if (!fp->aux->verifier_zext) + /* llgfr %dst,%dst */ + EMIT4(0xb9160000, dst_reg, dst_reg); break; case 64: /* dst = (u64) cpu_to_le64(dst) */ /* lrvgr %dst,%dst */ @@ -968,16 +984,22 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i /* llgc %dst,0(off,%src) */ EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off); jit->seen |= SEEN_MEM; + if (insn_is_zext(&insn[1])) + insn_count = 2; break; case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ /* llgh %dst,0(off,%src) */ EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off); jit->seen |= SEEN_MEM; + if (insn_is_zext(&insn[1])) + insn_count = 2; break; case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ /* llgf %dst,off(%src) */ jit->seen |= SEEN_MEM; EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off); + if (insn_is_zext(&insn[1])) + insn_count = 2; break; case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ /* lg %dst,0(off,%src) */ @@ -1282,6 +1304,11 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) return 0; } +bool bpf_jit_needs_zext(void) +{ + return true; +} + /* * Compile eBPF program "fp" */ diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index b77f512bb176..c7c99e18d5ff 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 config SUPERH def_bool y + select ARCH_HAS_BINFMT_FLAT if !MMU select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_MIGHT_HAVE_PC_PARPORT @@ -623,7 +624,7 @@ config CRASH_DUMP to a memory address not used by the main kernel using PHYSICAL_START. - For more details see Documentation/kdump/kdump.txt + For more details see Documentation/kdump/kdump.rst config KEXEC_JUMP bool "kexec jump (EXPERIMENTAL)" diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig index 5a1097641247..1e116529735f 100644 --- a/arch/sh/configs/se7712_defconfig +++ b/arch/sh/configs/se7712_defconfig @@ -63,7 +63,6 @@ CONFIG_NET_SCH_NETEM=y CONFIG_NET_CLS_TCINDEX=y CONFIG_NET_CLS_ROUTE4=y CONFIG_NET_CLS_FW=y -CONFIG_NET_CLS_IND=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_BLOCK=y diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig index 9c0ef13bee10..c66e512719ab 100644 --- a/arch/sh/configs/se7721_defconfig +++ b/arch/sh/configs/se7721_defconfig @@ -62,7 +62,6 @@ CONFIG_NET_SCH_NETEM=y CONFIG_NET_CLS_TCINDEX=y CONFIG_NET_CLS_ROUTE4=y CONFIG_NET_CLS_FW=y -CONFIG_NET_CLS_IND=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_BLOCK=y diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index 822fa9e96f74..171ab05ce4fc 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -142,7 +142,6 @@ CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_PEDIT=m -CONFIG_NET_CLS_IND=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_FW_LOADER=m CONFIG_CONNECTOR=m diff --git a/arch/sh/include/asm/flat.h b/arch/sh/include/asm/flat.h index 843d458b8329..fee4f25555cb 100644 --- a/arch/sh/include/asm/flat.h +++ b/arch/sh/include/asm/flat.h @@ -11,11 +11,8 @@ #include <asm/unaligned.h> -#define flat_argvp_envp_on_stack() 0 -#define flat_old_ram_flag(flags) (flags) -#define flat_reloc_valid(reloc, size) ((reloc) <= (size)) static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags, - u32 *addr, u32 *persistent) + u32 *addr) { *addr = get_unaligned((__force u32 *)rp); return 0; @@ -25,8 +22,6 @@ static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel) put_unaligned(addr, (__force u32 *)rp); return 0; } -#define flat_get_relocate_addr(rel) (rel) -#define flat_set_persistent(relval, p) ({ (void)p; 0; }) #define FLAT_PLAT_INIT(_r) \ do { _r->regs[0]=0; _r->regs[1]=0; _r->regs[2]=0; _r->regs[3]=0; \ diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 016a727d4357..834c9c7d79fa 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -436,3 +436,4 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 9265a9eece15..8029b681fc7c 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -115,6 +115,8 @@ #define SO_RCVTIMEO_NEW 0x0044 #define SO_SNDTIMEO_NEW 0x0045 +#define SO_DETACH_REUSEPORT_BPF 0x0047 + #if !defined(__KERNEL__) diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index e047480b1605..c58e71f21129 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -479,3 +479,4 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 65428e79b2f3..3364e2a00989 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -908,6 +908,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) /* dst = src */ case BPF_ALU | BPF_MOV | BPF_X: emit_alu3_K(SRL, src, 0, dst, ctx); + if (insn_is_zext(&insn[1])) + return 1; break; case BPF_ALU64 | BPF_MOV | BPF_X: emit_reg_move(src, dst, ctx); @@ -942,6 +944,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU | BPF_DIV | BPF_X: emit_write_y(G0, ctx); emit_alu(DIV, src, dst, ctx); + if (insn_is_zext(&insn[1])) + return 1; break; case BPF_ALU64 | BPF_DIV | BPF_X: emit_alu(UDIVX, src, dst, ctx); @@ -975,6 +979,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; case BPF_ALU | BPF_RSH | BPF_X: emit_alu(SRL, src, dst, ctx); + if (insn_is_zext(&insn[1])) + return 1; break; case BPF_ALU64 | BPF_RSH | BPF_X: emit_alu(SRLX, src, dst, ctx); @@ -997,9 +1003,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case 16: emit_alu_K(SLL, dst, 16, ctx); emit_alu_K(SRL, dst, 16, ctx); + if (insn_is_zext(&insn[1])) + return 1; break; case 32: - emit_alu_K(SRL, dst, 0, ctx); + if (!ctx->prog->aux->verifier_zext) + emit_alu_K(SRL, dst, 0, ctx); break; case 64: /* nop */ @@ -1021,6 +1030,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit_alu3_K(AND, dst, 0xff, dst, ctx); emit_alu3_K(SLL, tmp, 8, tmp, ctx); emit_alu(OR, tmp, dst, ctx); + if (insn_is_zext(&insn[1])) + return 1; break; case 32: @@ -1037,6 +1048,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit_alu3_K(AND, dst, 0xff, dst, ctx); /* dst = dst & 0xff */ emit_alu3_K(SLL, dst, 24, dst, ctx); /* dst = dst << 24 */ emit_alu(OR, tmp, dst, ctx); /* dst = dst | tmp */ + if (insn_is_zext(&insn[1])) + return 1; break; case 64: @@ -1050,6 +1063,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) /* dst = imm */ case BPF_ALU | BPF_MOV | BPF_K: emit_loadimm32(imm, dst, ctx); + if (insn_is_zext(&insn[1])) + return 1; break; case BPF_ALU64 | BPF_MOV | BPF_K: emit_loadimm_sext(imm, dst, ctx); @@ -1132,6 +1147,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; case BPF_ALU | BPF_RSH | BPF_K: emit_alu_K(SRL, dst, imm, ctx); + if (insn_is_zext(&insn[1])) + return 1; break; case BPF_ALU64 | BPF_RSH | BPF_K: emit_alu_K(SRLX, dst, imm, ctx); @@ -1144,7 +1161,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; do_alu32_trunc: - if (BPF_CLASS(code) == BPF_ALU) + if (BPF_CLASS(code) == BPF_ALU && + !ctx->prog->aux->verifier_zext) emit_alu_K(SRL, dst, 0, ctx); break; @@ -1265,6 +1283,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) rs2 = RS2(tmp); } emit(opcode | RS1(src) | rs2 | RD(dst), ctx); + if (opcode != LD64 && insn_is_zext(&insn[1])) + return 1; break; } /* ST: *(size *)(dst + off) = imm */ @@ -1432,6 +1452,11 @@ static void jit_fill_hole(void *area, unsigned int size) *ptr++ = 0x91d02005; /* ta 5 */ } +bool bpf_jit_needs_zext(void) +{ + return true; +} + struct sparc64_jit_data { struct bpf_binary_header *header; u8 *image; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5a72c98e60bc..dce10b18f4bc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -399,7 +399,7 @@ config SMP Y to "Enhanced Real Time Clock Support", below. The "Advanced Power Management" code will be disabled if you say Y here. - See also <file:Documentation/x86/i386/IO-APIC.txt>, + See also <file:Documentation/x86/i386/IO-APIC.rst>, <file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO available at <http://www.tldp.org/docs.html#howto>. @@ -1308,7 +1308,7 @@ config MICROCODE the Linux kernel. The preferred method to load microcode from a detached initrd is described - in Documentation/x86/microcode.txt. For that you need to enable + in Documentation/x86/microcode.rst. For that you need to enable CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the initrd for microcode blobs. @@ -1347,7 +1347,7 @@ config MICROCODE_OLD_INTERFACE It is inadequate because it runs too late to be able to properly load microcode on a machine and it needs special tools. Instead, you should've switched to the early loading method with the initrd or - builtin microcode by now: Documentation/x86/microcode.txt + builtin microcode by now: Documentation/x86/microcode.rst config X86_MSR tristate "/dev/cpu/*/msr - Model-specific register support" @@ -1496,7 +1496,7 @@ config X86_5LEVEL A kernel with the option enabled can be booted on machines that support 4- or 5-level paging. - See Documentation/x86/x86_64/5level-paging.txt for more + See Documentation/x86/x86_64/5level-paging.rst for more information. Say N if unsure. @@ -1644,7 +1644,7 @@ config ARCH_MEMORY_PROBE depends on X86_64 && MEMORY_HOTPLUG help This option enables a sysfs memory/probe interface for testing. - See Documentation/memory-hotplug.txt for more information. + See Documentation/admin-guide/mm/memory-hotplug.rst for more information. If you are unsure how to answer this question, answer N. config ARCH_PROC_KCORE_TEXT @@ -1801,7 +1801,7 @@ config MTRR You can safely say Y even if your machine doesn't have MTRRs, you'll just add about 9 KB to your kernel. - See <file:Documentation/x86/mtrr.txt> for more information. + See <file:Documentation/x86/mtrr.rst> for more information. config MTRR_SANITIZER def_bool y @@ -1913,7 +1913,7 @@ config X86_INTEL_MPX process and adds some branches to paths used during exec() and munmap(). - For details, see Documentation/x86/intel_mpx.txt + For details, see Documentation/x86/intel_mpx.rst If unsure, say N. @@ -1929,7 +1929,7 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS page-based protections, but without requiring modification of the page tables when an application changes protection domains. - For details, see Documentation/x86/protection-keys.txt + For details, see Documentation/core-api/protection-keys.rst If unsure, say y. @@ -2055,7 +2055,7 @@ config CRASH_DUMP to a memory address not used by the main kernel or BIOS using PHYSICAL_START, or it must be built as a relocatable image (CONFIG_RELOCATABLE=y). - For more details see Documentation/kdump/kdump.txt + For more details see Documentation/kdump/kdump.rst config KEXEC_JUMP bool "kexec jump" @@ -2092,7 +2092,7 @@ config PHYSICAL_START the reserved region. In other words, it can be set based on the "X" value as specified in the "crashkernel=YM@XM" command line boot parameter passed to the panic-ed - kernel. Please take a look at Documentation/kdump/kdump.txt + kernel. Please take a look at Documentation/kdump/kdump.rst for more details about crash dumps. Usage of bzImage for capturing the crash dump is recommended as diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 6791a3c97589..71c92db47c41 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -156,7 +156,7 @@ config IOMMU_DEBUG code. When you use it make sure you have a big enough IOMMU/AGP aperture. Most of the options enabled by this can be set more finegrained using the iommu= command line - options. See Documentation/x86/x86_64/boot-options.txt for more + options. See Documentation/x86/x86_64/boot-options.rst for more details. config IOMMU_LEAK diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index ad84239e595e..15255f388a85 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -44,17 +44,109 @@ static acpi_physical_address get_acpi_rsdp(void) return addr; } -/* Search EFI system tables for RSDP. */ -static acpi_physical_address efi_get_rsdp_addr(void) +/* + * Search EFI system tables for RSDP. If both ACPI_20_TABLE_GUID and + * ACPI_TABLE_GUID are found, take the former, which has more features. + */ +static acpi_physical_address +__efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables, + bool efi_64) { acpi_physical_address rsdp_addr = 0; #ifdef CONFIG_EFI - unsigned long systab, systab_tables, config_tables; + int i; + + /* Get EFI tables from systab. */ + for (i = 0; i < nr_tables; i++) { + acpi_physical_address table; + efi_guid_t guid; + + if (efi_64) { + efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables + i; + + guid = tbl->guid; + table = tbl->table; + + if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) { + debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n"); + return 0; + } + } else { + efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables + i; + + guid = tbl->guid; + table = tbl->table; + } + + if (!(efi_guidcmp(guid, ACPI_TABLE_GUID))) + rsdp_addr = table; + else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID))) + return table; + } +#endif + return rsdp_addr; +} + +/* EFI/kexec support is 64-bit only. */ +#ifdef CONFIG_X86_64 +static struct efi_setup_data *get_kexec_setup_data_addr(void) +{ + struct setup_data *data; + u64 pa_data; + + pa_data = boot_params->hdr.setup_data; + while (pa_data) { + data = (struct setup_data *)pa_data; + if (data->type == SETUP_EFI) + return (struct efi_setup_data *)(pa_data + sizeof(struct setup_data)); + + pa_data = data->next; + } + return NULL; +} + +static acpi_physical_address kexec_get_rsdp_addr(void) +{ + efi_system_table_64_t *systab; + struct efi_setup_data *esd; + struct efi_info *ei; + char *sig; + + esd = (struct efi_setup_data *)get_kexec_setup_data_addr(); + if (!esd) + return 0; + + if (!esd->tables) { + debug_putstr("Wrong kexec SETUP_EFI data.\n"); + return 0; + } + + ei = &boot_params->efi_info; + sig = (char *)&ei->efi_loader_signature; + if (strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) { + debug_putstr("Wrong kexec EFI loader signature.\n"); + return 0; + } + + /* Get systab from boot params. */ + systab = (efi_system_table_64_t *) (ei->efi_systab | ((__u64)ei->efi_systab_hi << 32)); + if (!systab) + error("EFI system table not found in kexec boot_params."); + + return __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables, true); +} +#else +static acpi_physical_address kexec_get_rsdp_addr(void) { return 0; } +#endif /* CONFIG_X86_64 */ + +static acpi_physical_address efi_get_rsdp_addr(void) +{ +#ifdef CONFIG_EFI + unsigned long systab, config_tables; unsigned int nr_tables; struct efi_info *ei; bool efi_64; - int size, i; char *sig; ei = &boot_params->efi_info; @@ -88,49 +180,20 @@ static acpi_physical_address efi_get_rsdp_addr(void) config_tables = stbl->tables; nr_tables = stbl->nr_tables; - size = sizeof(efi_config_table_64_t); } else { efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab; config_tables = stbl->tables; nr_tables = stbl->nr_tables; - size = sizeof(efi_config_table_32_t); } if (!config_tables) error("EFI config tables not found."); - /* Get EFI tables from systab. */ - for (i = 0; i < nr_tables; i++) { - acpi_physical_address table; - efi_guid_t guid; - - config_tables += size; - - if (efi_64) { - efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables; - - guid = tbl->guid; - table = tbl->table; - - if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) { - debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n"); - return 0; - } - } else { - efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables; - - guid = tbl->guid; - table = tbl->table; - } - - if (!(efi_guidcmp(guid, ACPI_TABLE_GUID))) - rsdp_addr = table; - else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID))) - return table; - } + return __efi_get_rsdp_addr(config_tables, nr_tables, efi_64); +#else + return 0; #endif - return rsdp_addr; } static u8 compute_checksum(u8 *buffer, u32 length) @@ -220,6 +283,14 @@ acpi_physical_address get_rsdp_addr(void) if (!pa) pa = boot_params->acpi_rsdp_addr; + /* + * Try to get EFI data from setup_data. This can happen when we're a + * kexec'ed kernel and kexec(1) has passed all the required EFI info to + * us. + */ + if (!pa) + pa = kexec_get_rsdp_addr(); + if (!pa) pa = efi_get_rsdp_addr(); diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index fafb75c6c592..6233ae35d0d9 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -659,6 +659,7 @@ no_longmode: gdt64: .word gdt_end - gdt .quad 0 + .balign 8 gdt: .word gdt_end - gdt .long gdt diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 5a237e8dbf8d..24e65a0f756d 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -351,9 +351,6 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, /* Clear flags intended for solely in-kernel use. */ boot_params->hdr.loadflags &= ~KASLR_FLAG; - /* Save RSDP address for later use. */ - /* boot_params->acpi_rsdp_addr = get_rsdp_addr(); */ - sanitize_boot_params(boot_params); if (boot_params->screen_info.orig_video_mode == 7) { @@ -368,6 +365,14 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, cols = boot_params->screen_info.orig_video_cols; console_init(); + + /* + * Save RSDP address for later use. Have this after console_init() + * so that early debugging output from the RSDP parsing code can be + * collected. + */ + boot_params->acpi_rsdp_addr = get_rsdp_addr(); + debug_putstr("early console in extract_kernel\n"); free_mem_ptr = heap; /* Heap */ diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 850b8762e889..2c11c0f45d49 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -313,7 +313,7 @@ start_sys_seg: .word SYSSEG # obsolete and meaningless, but just type_of_loader: .byte 0 # 0 means ancient bootloader, newer # bootloaders know to change this. - # See Documentation/x86/boot.txt for + # See Documentation/x86/boot.rst for # assigned ids # flags, unused bits must be zero (RFU) bit within loadflags @@ -419,7 +419,17 @@ xloadflags: # define XLF4 0 #endif - .word XLF0 | XLF1 | XLF23 | XLF4 +#ifdef CONFIG_X86_64 +#ifdef CONFIG_X86_5LEVEL +#define XLF56 (XLF_5LEVEL|XLF_5LEVEL_ENABLED) +#else +#define XLF56 XLF_5LEVEL +#endif +#else +#define XLF56 0 +#endif + + .word XLF0 | XLF1 | XLF23 | XLF4 | XLF56 cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, #added with boot protocol diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a829dd3117d0..0ea4831a72a4 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -8,7 +8,7 @@ * * entry.S contains the system-call and fault low-level handling routines. * - * Some of this is documented in Documentation/x86/entry_64.txt + * Some of this is documented in Documentation/x86/entry_64.rst * * A note on terminology: * - iret frame: Architecture defined interrupt frame from SS to RIP diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index ad968b7bac72..c00019abd076 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -438,3 +438,5 @@ 431 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig 432 i386 fsmount sys_fsmount __ia32_sys_fsmount 433 i386 fspick sys_fspick __ia32_sys_fspick +434 i386 pidfd_open sys_pidfd_open __ia32_sys_pidfd_open +435 i386 clone3 sys_clone3 __ia32_sys_clone3 diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index b4e6f9e6204a..c29976eca4a8 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -355,6 +355,8 @@ 431 common fsconfig __x64_sys_fsconfig 432 common fsmount __x64_sys_fsmount 433 common fspick __x64_sys_fspick +434 common pidfd_open __x64_sys_pidfd_open +435 common clone3 __x64_sys_clone3/ptregs # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile index 9cbfd34042d5..9e07f554333f 100644 --- a/arch/x86/events/Makefile +++ b/arch/x86/events/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += core.o +obj-y += core.o probe.o obj-y += amd/ obj-$(CONFIG_X86_LOCAL_APIC) += msr.o obj-$(CONFIG_CPU_SUP_INTEL) += intel/ diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index ceb712b0a1c6..81b005e4c7d9 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1618,68 +1618,6 @@ static struct attribute_group x86_pmu_format_group __ro_after_init = { .attrs = NULL, }; -/* - * Remove all undefined events (x86_pmu.event_map(id) == 0) - * out of events_attr attributes. - */ -static void __init filter_events(struct attribute **attrs) -{ - struct device_attribute *d; - struct perf_pmu_events_attr *pmu_attr; - int offset = 0; - int i, j; - - for (i = 0; attrs[i]; i++) { - d = (struct device_attribute *)attrs[i]; - pmu_attr = container_of(d, struct perf_pmu_events_attr, attr); - /* str trumps id */ - if (pmu_attr->event_str) - continue; - if (x86_pmu.event_map(i + offset)) - continue; - - for (j = i; attrs[j]; j++) - attrs[j] = attrs[j + 1]; - - /* Check the shifted attr. */ - i--; - - /* - * event_map() is index based, the attrs array is organized - * by increasing event index. If we shift the events, then - * we need to compensate for the event_map(), otherwise - * we are looking up the wrong event in the map - */ - offset++; - } -} - -/* Merge two pointer arrays */ -__init struct attribute **merge_attr(struct attribute **a, struct attribute **b) -{ - struct attribute **new; - int j, i; - - for (j = 0; a && a[j]; j++) - ; - for (i = 0; b && b[i]; i++) - j++; - j++; - - new = kmalloc_array(j, sizeof(struct attribute *), GFP_KERNEL); - if (!new) - return NULL; - - j = 0; - for (i = 0; a && a[i]; i++) - new[j++] = a[i]; - for (i = 0; b && b[i]; i++) - new[j++] = b[i]; - new[j] = NULL; - - return new; -} - ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) { struct perf_pmu_events_attr *pmu_attr = \ @@ -1744,9 +1682,24 @@ static struct attribute *events_attr[] = { NULL, }; +/* + * Remove all undefined events (x86_pmu.event_map(id) == 0) + * out of events_attr attributes. + */ +static umode_t +is_visible(struct kobject *kobj, struct attribute *attr, int idx) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr); + /* str trumps id */ + return pmu_attr->event_str || x86_pmu.event_map(idx) ? attr->mode : 0; +} + static struct attribute_group x86_pmu_events_group __ro_after_init = { .name = "events", .attrs = events_attr, + .is_visible = is_visible, }; ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event) @@ -1842,37 +1795,10 @@ static int __init init_hw_perf_events(void) x86_pmu_format_group.attrs = x86_pmu.format_attrs; - if (x86_pmu.caps_attrs) { - struct attribute **tmp; - - tmp = merge_attr(x86_pmu_caps_group.attrs, x86_pmu.caps_attrs); - if (!WARN_ON(!tmp)) - x86_pmu_caps_group.attrs = tmp; - } - - if (x86_pmu.event_attrs) - x86_pmu_events_group.attrs = x86_pmu.event_attrs; - if (!x86_pmu.events_sysfs_show) x86_pmu_events_group.attrs = &empty_attrs; - else - filter_events(x86_pmu_events_group.attrs); - if (x86_pmu.cpu_events) { - struct attribute **tmp; - - tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events); - if (!WARN_ON(!tmp)) - x86_pmu_events_group.attrs = tmp; - } - - if (x86_pmu.attrs) { - struct attribute **tmp; - - tmp = merge_attr(x86_pmu_attr_group.attrs, x86_pmu.attrs); - if (!WARN_ON(!tmp)) - x86_pmu_attr_group.attrs = tmp; - } + pmu.attr_update = x86_pmu.attr_update; pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.cntval_bits); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a5436cee20b1..bda450ff51ee 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -20,6 +20,7 @@ #include <asm/intel-family.h> #include <asm/apic.h> #include <asm/cpu_device_id.h> +#include <asm/hypervisor.h> #include "../perf_event.h" @@ -3897,8 +3898,6 @@ static __initconst const struct x86_pmu core_pmu = { .check_period = intel_pmu_check_period, }; -static struct attribute *intel_pmu_attrs[]; - static __initconst const struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, @@ -3930,8 +3929,6 @@ static __initconst const struct x86_pmu intel_pmu = { .format_attrs = intel_arch3_formats_attr, .events_sysfs_show = intel_event_sysfs_show, - .attrs = intel_pmu_attrs, - .cpu_prepare = intel_pmu_cpu_prepare, .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, @@ -4055,6 +4052,13 @@ static bool check_msr(unsigned long msr, u64 mask) u64 val_old, val_new, val_tmp; /* + * Disable the check for real HW, so we don't + * mess with potentionaly enabled registers: + */ + if (hypervisor_is_type(X86_HYPER_NATIVE)) + return true; + + /* * Read the current value, change it and read it back to see if it * matches, this is needed to detect certain hardware emulators * (qemu/kvm) that don't trap on the MSR access and always return 0s. @@ -4274,13 +4278,6 @@ static struct attribute *icl_tsx_events_attrs[] = { NULL, }; -static __init struct attribute **get_icl_events_attrs(void) -{ - return boot_cpu_has(X86_FEATURE_RTM) ? - merge_attr(icl_events_attrs, icl_tsx_events_attrs) : - icl_events_attrs; -} - static ssize_t freeze_on_smi_show(struct device *cdev, struct device_attribute *attr, char *buf) @@ -4402,43 +4399,111 @@ static DEVICE_ATTR(allow_tsx_force_abort, 0644, static struct attribute *intel_pmu_attrs[] = { &dev_attr_freeze_on_smi.attr, - NULL, /* &dev_attr_allow_tsx_force_abort.attr.attr */ + &dev_attr_allow_tsx_force_abort.attr, NULL, }; -static __init struct attribute ** -get_events_attrs(struct attribute **base, - struct attribute **mem, - struct attribute **tsx) +static umode_t +tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i) { - struct attribute **attrs = base; - struct attribute **old; + return boot_cpu_has(X86_FEATURE_RTM) ? attr->mode : 0; +} - if (mem && x86_pmu.pebs) - attrs = merge_attr(attrs, mem); +static umode_t +pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return x86_pmu.pebs ? attr->mode : 0; +} - if (tsx && boot_cpu_has(X86_FEATURE_RTM)) { - old = attrs; - attrs = merge_attr(attrs, tsx); - if (old != base) - kfree(old); - } +static umode_t +lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return x86_pmu.lbr_nr ? attr->mode : 0; +} - return attrs; +static umode_t +exra_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return x86_pmu.version >= 2 ? attr->mode : 0; } +static umode_t +default_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + if (attr == &dev_attr_allow_tsx_force_abort.attr) + return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0; + + return attr->mode; +} + +static struct attribute_group group_events_td = { + .name = "events", +}; + +static struct attribute_group group_events_mem = { + .name = "events", + .is_visible = pebs_is_visible, +}; + +static struct attribute_group group_events_tsx = { + .name = "events", + .is_visible = tsx_is_visible, +}; + +static struct attribute_group group_caps_gen = { + .name = "caps", + .attrs = intel_pmu_caps_attrs, +}; + +static struct attribute_group group_caps_lbr = { + .name = "caps", + .attrs = lbr_attrs, + .is_visible = lbr_is_visible, +}; + +static struct attribute_group group_format_extra = { + .name = "format", + .is_visible = exra_is_visible, +}; + +static struct attribute_group group_format_extra_skl = { + .name = "format", + .is_visible = exra_is_visible, +}; + +static struct attribute_group group_default = { + .attrs = intel_pmu_attrs, + .is_visible = default_is_visible, +}; + +static const struct attribute_group *attr_update[] = { + &group_events_td, + &group_events_mem, + &group_events_tsx, + &group_caps_gen, + &group_caps_lbr, + &group_format_extra, + &group_format_extra_skl, + &group_default, + NULL, +}; + +static struct attribute *empty_attrs; + __init int intel_pmu_init(void) { - struct attribute **extra_attr = NULL; - struct attribute **mem_attr = NULL; - struct attribute **tsx_attr = NULL; - struct attribute **to_free = NULL; + struct attribute **extra_skl_attr = &empty_attrs; + struct attribute **extra_attr = &empty_attrs; + struct attribute **td_attr = &empty_attrs; + struct attribute **mem_attr = &empty_attrs; + struct attribute **tsx_attr = &empty_attrs; union cpuid10_edx edx; union cpuid10_eax eax; union cpuid10_ebx ebx; struct event_constraint *c; unsigned int unused; struct extra_reg *er; + bool pmem = false; int version, i; char *name; @@ -4596,7 +4661,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; x86_pmu.extra_regs = intel_slm_extra_regs; x86_pmu.flags |= PMU_FL_HAS_RSP_1; - x86_pmu.cpu_events = slm_events_attrs; + td_attr = slm_events_attrs; extra_attr = slm_format_attr; pr_cont("Silvermont events, "); name = "silvermont"; @@ -4624,7 +4689,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_prec_dist = true; x86_pmu.lbr_pt_coexist = true; x86_pmu.flags |= PMU_FL_HAS_RSP_1; - x86_pmu.cpu_events = glm_events_attrs; + td_attr = glm_events_attrs; extra_attr = slm_format_attr; pr_cont("Goldmont events, "); name = "goldmont"; @@ -4651,7 +4716,7 @@ __init int intel_pmu_init(void) x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_PEBS_ALL; x86_pmu.get_event_constraints = glp_get_event_constraints; - x86_pmu.cpu_events = glm_events_attrs; + td_attr = glm_events_attrs; /* Goldmont Plus has 4-wide pipeline */ event_attr_td_total_slots_scale_glm.event_str = "4"; extra_attr = slm_format_attr; @@ -4740,7 +4805,7 @@ __init int intel_pmu_init(void) x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - x86_pmu.cpu_events = snb_events_attrs; + td_attr = snb_events_attrs; mem_attr = snb_mem_events_attrs; /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ @@ -4781,7 +4846,7 @@ __init int intel_pmu_init(void) x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - x86_pmu.cpu_events = snb_events_attrs; + td_attr = snb_events_attrs; mem_attr = snb_mem_events_attrs; /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ @@ -4818,10 +4883,10 @@ __init int intel_pmu_init(void) x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; - x86_pmu.cpu_events = hsw_events_attrs; x86_pmu.lbr_double_abort = true; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; + td_attr = hsw_events_attrs; mem_attr = hsw_mem_events_attrs; tsx_attr = hsw_tsx_events_attrs; pr_cont("Haswell events, "); @@ -4860,10 +4925,10 @@ __init int intel_pmu_init(void) x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; - x86_pmu.cpu_events = hsw_events_attrs; x86_pmu.limit_period = bdw_limit_period; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; + td_attr = hsw_events_attrs; mem_attr = hsw_mem_events_attrs; tsx_attr = hsw_tsx_events_attrs; pr_cont("Broadwell events, "); @@ -4890,9 +4955,10 @@ __init int intel_pmu_init(void) name = "knights-landing"; break; + case INTEL_FAM6_SKYLAKE_X: + pmem = true; case INTEL_FAM6_SKYLAKE_MOBILE: case INTEL_FAM6_SKYLAKE_DESKTOP: - case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_KABYLAKE_MOBILE: case INTEL_FAM6_KABYLAKE_DESKTOP: x86_add_quirk(intel_pebs_isolation_quirk); @@ -4920,27 +4986,28 @@ __init int intel_pmu_init(void) x86_pmu.get_event_constraints = hsw_get_event_constraints; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; - extra_attr = merge_attr(extra_attr, skl_format_attr); - to_free = extra_attr; - x86_pmu.cpu_events = hsw_events_attrs; + extra_skl_attr = skl_format_attr; + td_attr = hsw_events_attrs; mem_attr = hsw_mem_events_attrs; tsx_attr = hsw_tsx_events_attrs; - intel_pmu_pebs_data_source_skl( - boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X); + intel_pmu_pebs_data_source_skl(pmem); if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) { x86_pmu.flags |= PMU_FL_TFA; x86_pmu.get_event_constraints = tfa_get_event_constraints; x86_pmu.enable_all = intel_tfa_pmu_enable_all; x86_pmu.commit_scheduling = intel_tfa_commit_scheduling; - intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr; } pr_cont("Skylake events, "); name = "skylake"; break; + case INTEL_FAM6_ICELAKE_X: + case INTEL_FAM6_ICELAKE_XEON_D: + pmem = true; case INTEL_FAM6_ICELAKE_MOBILE: + case INTEL_FAM6_ICELAKE_DESKTOP: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); @@ -4959,11 +5026,12 @@ __init int intel_pmu_init(void) x86_pmu.get_event_constraints = icl_get_event_constraints; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; - extra_attr = merge_attr(extra_attr, skl_format_attr); - x86_pmu.cpu_events = get_icl_events_attrs(); + extra_skl_attr = skl_format_attr; + mem_attr = icl_events_attrs; + tsx_attr = icl_tsx_events_attrs; x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02); x86_pmu.lbr_pt_coexist = true; - intel_pmu_pebs_data_source_skl(false); + intel_pmu_pebs_data_source_skl(pmem); pr_cont("Icelake events, "); name = "icelake"; break; @@ -4988,14 +5056,14 @@ __init int intel_pmu_init(void) snprintf(pmu_name_str, sizeof(pmu_name_str), "%s", name); - if (version >= 2 && extra_attr) { - x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr, - extra_attr); - WARN_ON(!x86_pmu.format_attrs); - } - x86_pmu.cpu_events = get_events_attrs(x86_pmu.cpu_events, - mem_attr, tsx_attr); + group_events_td.attrs = td_attr; + group_events_mem.attrs = mem_attr; + group_events_tsx.attrs = tsx_attr; + group_format_extra.attrs = extra_attr; + group_format_extra_skl.attrs = extra_skl_attr; + + x86_pmu.attr_update = attr_update; if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) { WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", @@ -5043,12 +5111,8 @@ __init int intel_pmu_init(void) x86_pmu.lbr_nr = 0; } - x86_pmu.caps_attrs = intel_pmu_caps_attrs; - - if (x86_pmu.lbr_nr) { - x86_pmu.caps_attrs = merge_attr(x86_pmu.caps_attrs, lbr_attrs); + if (x86_pmu.lbr_nr) pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); - } /* * Access extra MSR may cause #GP under certain circumstances. @@ -5078,7 +5142,6 @@ __init int intel_pmu_init(void) if (x86_pmu.counter_freezing) x86_pmu.handle_irq = intel_pmu_handle_irq_v4; - kfree(to_free); return 0; } diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 267d7f8e12ab..688592b34564 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -96,6 +96,7 @@ #include <asm/cpu_device_id.h> #include <asm/intel-family.h> #include "../perf_event.h" +#include "../probe.h" MODULE_LICENSE("GPL"); @@ -144,25 +145,42 @@ enum perf_cstate_core_events { PERF_CSTATE_CORE_EVENT_MAX, }; -PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00"); -PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01"); -PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02"); -PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03"); +PMU_EVENT_ATTR_STRING(c1-residency, attr_cstate_core_c1, "event=0x00"); +PMU_EVENT_ATTR_STRING(c3-residency, attr_cstate_core_c3, "event=0x01"); +PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_core_c6, "event=0x02"); +PMU_EVENT_ATTR_STRING(c7-residency, attr_cstate_core_c7, "event=0x03"); -static struct perf_cstate_msr core_msr[] = { - [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1 }, - [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3 }, - [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6 }, - [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7 }, +static unsigned long core_msr_mask; + +PMU_EVENT_GROUP(events, cstate_core_c1); +PMU_EVENT_GROUP(events, cstate_core_c3); +PMU_EVENT_GROUP(events, cstate_core_c6); +PMU_EVENT_GROUP(events, cstate_core_c7); + +static bool test_msr(int idx, void *data) +{ + return test_bit(idx, (unsigned long *) data); +} + +static struct perf_msr core_msr[] = { + [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &group_cstate_core_c1, test_msr }, + [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &group_cstate_core_c3, test_msr }, + [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &group_cstate_core_c6, test_msr }, + [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &group_cstate_core_c7, test_msr }, }; -static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = { +static struct attribute *attrs_empty[] = { NULL, }; +/* + * There are no default events, but we need to create + * "events" group (with empty attrs) before updating + * it with detected events. + */ static struct attribute_group core_events_attr_group = { .name = "events", - .attrs = core_events_attrs, + .attrs = attrs_empty, }; DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63"); @@ -211,31 +229,37 @@ enum perf_cstate_pkg_events { PERF_CSTATE_PKG_EVENT_MAX, }; -PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00"); -PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01"); -PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02"); -PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03"); -PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04"); -PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05"); -PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06"); - -static struct perf_cstate_msr pkg_msr[] = { - [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2 }, - [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3 }, - [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6 }, - [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7 }, - [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8 }, - [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9 }, - [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10 }, -}; - -static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = { - NULL, +PMU_EVENT_ATTR_STRING(c2-residency, attr_cstate_pkg_c2, "event=0x00"); +PMU_EVENT_ATTR_STRING(c3-residency, attr_cstate_pkg_c3, "event=0x01"); +PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_pkg_c6, "event=0x02"); +PMU_EVENT_ATTR_STRING(c7-residency, attr_cstate_pkg_c7, "event=0x03"); +PMU_EVENT_ATTR_STRING(c8-residency, attr_cstate_pkg_c8, "event=0x04"); +PMU_EVENT_ATTR_STRING(c9-residency, attr_cstate_pkg_c9, "event=0x05"); +PMU_EVENT_ATTR_STRING(c10-residency, attr_cstate_pkg_c10, "event=0x06"); + +static unsigned long pkg_msr_mask; + +PMU_EVENT_GROUP(events, cstate_pkg_c2); +PMU_EVENT_GROUP(events, cstate_pkg_c3); +PMU_EVENT_GROUP(events, cstate_pkg_c6); +PMU_EVENT_GROUP(events, cstate_pkg_c7); +PMU_EVENT_GROUP(events, cstate_pkg_c8); +PMU_EVENT_GROUP(events, cstate_pkg_c9); +PMU_EVENT_GROUP(events, cstate_pkg_c10); + +static struct perf_msr pkg_msr[] = { + [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &group_cstate_pkg_c2, test_msr }, + [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &group_cstate_pkg_c3, test_msr }, + [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &group_cstate_pkg_c6, test_msr }, + [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &group_cstate_pkg_c7, test_msr }, + [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &group_cstate_pkg_c8, test_msr }, + [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &group_cstate_pkg_c9, test_msr }, + [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &group_cstate_pkg_c10, test_msr }, }; static struct attribute_group pkg_events_attr_group = { .name = "events", - .attrs = pkg_events_attrs, + .attrs = attrs_empty, }; DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63"); @@ -289,7 +313,8 @@ static int cstate_pmu_event_init(struct perf_event *event) if (event->pmu == &cstate_core_pmu) { if (cfg >= PERF_CSTATE_CORE_EVENT_MAX) return -EINVAL; - if (!core_msr[cfg].attr) + cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_CORE_EVENT_MAX); + if (!(core_msr_mask & (1 << cfg))) return -EINVAL; event->hw.event_base = core_msr[cfg].msr; cpu = cpumask_any_and(&cstate_core_cpu_mask, @@ -298,7 +323,7 @@ static int cstate_pmu_event_init(struct perf_event *event) if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) return -EINVAL; cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX); - if (!pkg_msr[cfg].attr) + if (!(pkg_msr_mask & (1 << cfg))) return -EINVAL; event->hw.event_base = pkg_msr[cfg].msr; cpu = cpumask_any_and(&cstate_pkg_cpu_mask, @@ -421,8 +446,28 @@ static int cstate_cpu_init(unsigned int cpu) return 0; } +const struct attribute_group *core_attr_update[] = { + &group_cstate_core_c1, + &group_cstate_core_c3, + &group_cstate_core_c6, + &group_cstate_core_c7, + NULL, +}; + +const struct attribute_group *pkg_attr_update[] = { + &group_cstate_pkg_c2, + &group_cstate_pkg_c3, + &group_cstate_pkg_c6, + &group_cstate_pkg_c7, + &group_cstate_pkg_c8, + &group_cstate_pkg_c9, + &group_cstate_pkg_c10, + NULL, +}; + static struct pmu cstate_core_pmu = { .attr_groups = core_attr_groups, + .attr_update = core_attr_update, .name = "cstate_core", .task_ctx_nr = perf_invalid_context, .event_init = cstate_pmu_event_init, @@ -437,6 +482,7 @@ static struct pmu cstate_core_pmu = { static struct pmu cstate_pkg_pmu = { .attr_groups = pkg_attr_groups, + .attr_update = pkg_attr_update, .name = "cstate_pkg", .task_ctx_nr = perf_invalid_context, .event_init = cstate_pmu_event_init, @@ -580,35 +626,11 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates), X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_MOBILE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_DESKTOP, snb_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); -/* - * Probe the cstate events and insert the available one into sysfs attrs - * Return false if there are no available events. - */ -static bool __init cstate_probe_msr(const unsigned long evmsk, int max, - struct perf_cstate_msr *msr, - struct attribute **attrs) -{ - bool found = false; - unsigned int bit; - u64 val; - - for (bit = 0; bit < max; bit++) { - if (test_bit(bit, &evmsk) && !rdmsrl_safe(msr[bit].msr, &val)) { - *attrs++ = &msr[bit].attr->attr.attr; - found = true; - } else { - msr[bit].attr = NULL; - } - } - *attrs = NULL; - - return found; -} - static int __init cstate_probe(const struct cstate_model *cm) { /* SLM has different MSR for PKG C6 */ @@ -620,13 +642,14 @@ static int __init cstate_probe(const struct cstate_model *cm) pkg_msr[PERF_CSTATE_CORE_C6_RES].msr = MSR_KNL_CORE_C6_RESIDENCY; - has_cstate_core = cstate_probe_msr(cm->core_events, - PERF_CSTATE_CORE_EVENT_MAX, - core_msr, core_events_attrs); + core_msr_mask = perf_msr_probe(core_msr, PERF_CSTATE_CORE_EVENT_MAX, + true, (void *) &cm->core_events); + + pkg_msr_mask = perf_msr_probe(pkg_msr, PERF_CSTATE_PKG_EVENT_MAX, + true, (void *) &cm->pkg_events); - has_cstate_pkg = cstate_probe_msr(cm->pkg_events, - PERF_CSTATE_PKG_EVENT_MAX, - pkg_msr, pkg_events_attrs); + has_cstate_core = !!core_msr_mask; + has_cstate_pkg = !!pkg_msr_mask; return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV; } diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 8c7ecde3ba70..64ab51ffdf06 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -55,27 +55,28 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/perf_event.h> +#include <linux/nospec.h> #include <asm/cpu_device_id.h> #include <asm/intel-family.h> #include "../perf_event.h" +#include "../probe.h" MODULE_LICENSE("GPL"); /* * RAPL energy status counters */ -#define RAPL_IDX_PP0_NRG_STAT 0 /* all cores */ -#define INTEL_RAPL_PP0 0x1 /* pseudo-encoding */ -#define RAPL_IDX_PKG_NRG_STAT 1 /* entire package */ -#define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */ -#define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */ -#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */ -#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */ -#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */ -#define RAPL_IDX_PSYS_NRG_STAT 4 /* psys */ -#define INTEL_RAPL_PSYS 0x5 /* pseudo-encoding */ - -#define NR_RAPL_DOMAINS 0x5 +enum perf_rapl_events { + PERF_RAPL_PP0 = 0, /* all cores */ + PERF_RAPL_PKG, /* entire package */ + PERF_RAPL_RAM, /* DRAM */ + PERF_RAPL_PP1, /* gpu */ + PERF_RAPL_PSYS, /* psys */ + + PERF_RAPL_MAX, + NR_RAPL_DOMAINS = PERF_RAPL_MAX, +}; + static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { "pp0-core", "package", @@ -84,33 +85,6 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { "psys", }; -/* Clients have PP0, PKG */ -#define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\ - 1<<RAPL_IDX_PKG_NRG_STAT|\ - 1<<RAPL_IDX_PP1_NRG_STAT) - -/* Servers have PP0, PKG, RAM */ -#define RAPL_IDX_SRV (1<<RAPL_IDX_PP0_NRG_STAT|\ - 1<<RAPL_IDX_PKG_NRG_STAT|\ - 1<<RAPL_IDX_RAM_NRG_STAT) - -/* Servers have PP0, PKG, RAM, PP1 */ -#define RAPL_IDX_HSW (1<<RAPL_IDX_PP0_NRG_STAT|\ - 1<<RAPL_IDX_PKG_NRG_STAT|\ - 1<<RAPL_IDX_RAM_NRG_STAT|\ - 1<<RAPL_IDX_PP1_NRG_STAT) - -/* SKL clients have PP0, PKG, RAM, PP1, PSYS */ -#define RAPL_IDX_SKL_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\ - 1<<RAPL_IDX_PKG_NRG_STAT|\ - 1<<RAPL_IDX_RAM_NRG_STAT|\ - 1<<RAPL_IDX_PP1_NRG_STAT|\ - 1<<RAPL_IDX_PSYS_NRG_STAT) - -/* Knights Landing has PKG, RAM */ -#define RAPL_IDX_KNL (1<<RAPL_IDX_PKG_NRG_STAT|\ - 1<<RAPL_IDX_RAM_NRG_STAT) - /* * event code: LSB 8 bits, passed in attr->config * any other bit is reserved @@ -153,12 +127,18 @@ struct rapl_pmus { struct rapl_pmu *pmus[]; }; +struct rapl_model { + unsigned long events; + bool apply_quirk; +}; + /* 1/2^hw_unit Joule */ static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; static struct rapl_pmus *rapl_pmus; static cpumask_t rapl_cpu_mask; static unsigned int rapl_cntr_mask; static u64 rapl_timer_ms; +static struct perf_msr rapl_msrs[]; static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) { @@ -350,7 +330,7 @@ static void rapl_pmu_event_del(struct perf_event *event, int flags) static int rapl_pmu_event_init(struct perf_event *event) { u64 cfg = event->attr.config & RAPL_EVENT_MASK; - int bit, msr, ret = 0; + int bit, ret = 0; struct rapl_pmu *pmu; /* only look at RAPL events */ @@ -366,33 +346,12 @@ static int rapl_pmu_event_init(struct perf_event *event) event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; - /* - * check event is known (determines counter) - */ - switch (cfg) { - case INTEL_RAPL_PP0: - bit = RAPL_IDX_PP0_NRG_STAT; - msr = MSR_PP0_ENERGY_STATUS; - break; - case INTEL_RAPL_PKG: - bit = RAPL_IDX_PKG_NRG_STAT; - msr = MSR_PKG_ENERGY_STATUS; - break; - case INTEL_RAPL_RAM: - bit = RAPL_IDX_RAM_NRG_STAT; - msr = MSR_DRAM_ENERGY_STATUS; - break; - case INTEL_RAPL_PP1: - bit = RAPL_IDX_PP1_NRG_STAT; - msr = MSR_PP1_ENERGY_STATUS; - break; - case INTEL_RAPL_PSYS: - bit = RAPL_IDX_PSYS_NRG_STAT; - msr = MSR_PLATFORM_ENERGY_STATUS; - break; - default: + if (!cfg || cfg >= NR_RAPL_DOMAINS + 1) return -EINVAL; - } + + cfg = array_index_nospec((long)cfg, NR_RAPL_DOMAINS + 1); + bit = cfg - 1; + /* check event supported */ if (!(rapl_cntr_mask & (1 << bit))) return -EINVAL; @@ -407,7 +366,7 @@ static int rapl_pmu_event_init(struct perf_event *event) return -EINVAL; event->cpu = pmu->cpu; event->pmu_private = pmu; - event->hw.event_base = msr; + event->hw.event_base = rapl_msrs[bit].msr; event->hw.config = cfg; event->hw.idx = bit; @@ -457,110 +416,111 @@ RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890 RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10"); RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10"); -static struct attribute *rapl_events_srv_attr[] = { - EVENT_PTR(rapl_cores), - EVENT_PTR(rapl_pkg), - EVENT_PTR(rapl_ram), +/* + * There are no default events, but we need to create + * "events" group (with empty attrs) before updating + * it with detected events. + */ +static struct attribute *attrs_empty[] = { + NULL, +}; - EVENT_PTR(rapl_cores_unit), - EVENT_PTR(rapl_pkg_unit), - EVENT_PTR(rapl_ram_unit), +static struct attribute_group rapl_pmu_events_group = { + .name = "events", + .attrs = attrs_empty, +}; - EVENT_PTR(rapl_cores_scale), - EVENT_PTR(rapl_pkg_scale), - EVENT_PTR(rapl_ram_scale), +DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7"); +static struct attribute *rapl_formats_attr[] = { + &format_attr_event.attr, NULL, }; -static struct attribute *rapl_events_cln_attr[] = { - EVENT_PTR(rapl_cores), - EVENT_PTR(rapl_pkg), - EVENT_PTR(rapl_gpu), - - EVENT_PTR(rapl_cores_unit), - EVENT_PTR(rapl_pkg_unit), - EVENT_PTR(rapl_gpu_unit), +static struct attribute_group rapl_pmu_format_group = { + .name = "format", + .attrs = rapl_formats_attr, +}; - EVENT_PTR(rapl_cores_scale), - EVENT_PTR(rapl_pkg_scale), - EVENT_PTR(rapl_gpu_scale), +static const struct attribute_group *rapl_attr_groups[] = { + &rapl_pmu_attr_group, + &rapl_pmu_format_group, + &rapl_pmu_events_group, NULL, }; -static struct attribute *rapl_events_hsw_attr[] = { +static struct attribute *rapl_events_cores[] = { EVENT_PTR(rapl_cores), - EVENT_PTR(rapl_pkg), - EVENT_PTR(rapl_gpu), - EVENT_PTR(rapl_ram), - EVENT_PTR(rapl_cores_unit), - EVENT_PTR(rapl_pkg_unit), - EVENT_PTR(rapl_gpu_unit), - EVENT_PTR(rapl_ram_unit), - EVENT_PTR(rapl_cores_scale), - EVENT_PTR(rapl_pkg_scale), - EVENT_PTR(rapl_gpu_scale), - EVENT_PTR(rapl_ram_scale), NULL, }; -static struct attribute *rapl_events_skl_attr[] = { - EVENT_PTR(rapl_cores), - EVENT_PTR(rapl_pkg), - EVENT_PTR(rapl_gpu), - EVENT_PTR(rapl_ram), - EVENT_PTR(rapl_psys), +static struct attribute_group rapl_events_cores_group = { + .name = "events", + .attrs = rapl_events_cores, +}; - EVENT_PTR(rapl_cores_unit), +static struct attribute *rapl_events_pkg[] = { + EVENT_PTR(rapl_pkg), EVENT_PTR(rapl_pkg_unit), - EVENT_PTR(rapl_gpu_unit), - EVENT_PTR(rapl_ram_unit), - EVENT_PTR(rapl_psys_unit), - - EVENT_PTR(rapl_cores_scale), EVENT_PTR(rapl_pkg_scale), - EVENT_PTR(rapl_gpu_scale), - EVENT_PTR(rapl_ram_scale), - EVENT_PTR(rapl_psys_scale), NULL, }; -static struct attribute *rapl_events_knl_attr[] = { - EVENT_PTR(rapl_pkg), - EVENT_PTR(rapl_ram), +static struct attribute_group rapl_events_pkg_group = { + .name = "events", + .attrs = rapl_events_pkg, +}; - EVENT_PTR(rapl_pkg_unit), +static struct attribute *rapl_events_ram[] = { + EVENT_PTR(rapl_ram), EVENT_PTR(rapl_ram_unit), - - EVENT_PTR(rapl_pkg_scale), EVENT_PTR(rapl_ram_scale), NULL, }; -static struct attribute_group rapl_pmu_events_group = { - .name = "events", - .attrs = NULL, /* patched at runtime */ +static struct attribute_group rapl_events_ram_group = { + .name = "events", + .attrs = rapl_events_ram, }; -DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7"); -static struct attribute *rapl_formats_attr[] = { - &format_attr_event.attr, +static struct attribute *rapl_events_gpu[] = { + EVENT_PTR(rapl_gpu), + EVENT_PTR(rapl_gpu_unit), + EVENT_PTR(rapl_gpu_scale), NULL, }; -static struct attribute_group rapl_pmu_format_group = { - .name = "format", - .attrs = rapl_formats_attr, +static struct attribute_group rapl_events_gpu_group = { + .name = "events", + .attrs = rapl_events_gpu, }; -static const struct attribute_group *rapl_attr_groups[] = { - &rapl_pmu_attr_group, - &rapl_pmu_format_group, - &rapl_pmu_events_group, +static struct attribute *rapl_events_psys[] = { + EVENT_PTR(rapl_psys), + EVENT_PTR(rapl_psys_unit), + EVENT_PTR(rapl_psys_scale), NULL, }; +static struct attribute_group rapl_events_psys_group = { + .name = "events", + .attrs = rapl_events_psys, +}; + +static bool test_msr(int idx, void *data) +{ + return test_bit(idx, (unsigned long *) data); +} + +static struct perf_msr rapl_msrs[] = { + [PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr }, + [PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr }, + [PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr }, + [PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr }, + [PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr }, +}; + static int rapl_cpu_offline(unsigned int cpu) { struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); @@ -633,7 +593,7 @@ static int rapl_check_hw_unit(bool apply_quirk) * of 2. Datasheet, September 2014, Reference Number: 330784-001 " */ if (apply_quirk) - rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16; + rapl_hw_unit[PERF_RAPL_RAM] = 16; /* * Calculate the timer rate: @@ -674,6 +634,15 @@ static void cleanup_rapl_pmus(void) kfree(rapl_pmus); } +const struct attribute_group *rapl_attr_update[] = { + &rapl_events_cores_group, + &rapl_events_pkg_group, + &rapl_events_ram_group, + &rapl_events_gpu_group, + &rapl_events_gpu_group, + NULL, +}; + static int __init init_rapl_pmus(void) { int maxdie = topology_max_packages() * topology_max_die_per_package(); @@ -686,6 +655,7 @@ static int __init init_rapl_pmus(void) rapl_pmus->maxdie = maxdie; rapl_pmus->pmu.attr_groups = rapl_attr_groups; + rapl_pmus->pmu.attr_update = rapl_attr_update; rapl_pmus->pmu.task_ctx_nr = perf_invalid_context; rapl_pmus->pmu.event_init = rapl_pmu_event_init; rapl_pmus->pmu.add = rapl_pmu_event_add; @@ -701,105 +671,96 @@ static int __init init_rapl_pmus(void) #define X86_RAPL_MODEL_MATCH(model, init) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init } -struct intel_rapl_init_fun { - bool apply_quirk; - int cntr_mask; - struct attribute **attrs; -}; - -static const struct intel_rapl_init_fun snb_rapl_init __initconst = { - .apply_quirk = false, - .cntr_mask = RAPL_IDX_CLN, - .attrs = rapl_events_cln_attr, +static struct rapl_model model_snb = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_PP1), + .apply_quirk = false, }; -static const struct intel_rapl_init_fun hsx_rapl_init __initconst = { - .apply_quirk = true, - .cntr_mask = RAPL_IDX_SRV, - .attrs = rapl_events_srv_attr, +static struct rapl_model model_snbep = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM), + .apply_quirk = false, }; -static const struct intel_rapl_init_fun hsw_rapl_init __initconst = { - .apply_quirk = false, - .cntr_mask = RAPL_IDX_HSW, - .attrs = rapl_events_hsw_attr, +static struct rapl_model model_hsw = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM) | + BIT(PERF_RAPL_PP1), + .apply_quirk = false, }; -static const struct intel_rapl_init_fun snbep_rapl_init __initconst = { - .apply_quirk = false, - .cntr_mask = RAPL_IDX_SRV, - .attrs = rapl_events_srv_attr, +static struct rapl_model model_hsx = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM), + .apply_quirk = true, }; -static const struct intel_rapl_init_fun knl_rapl_init __initconst = { - .apply_quirk = true, - .cntr_mask = RAPL_IDX_KNL, - .attrs = rapl_events_knl_attr, +static struct rapl_model model_knl = { + .events = BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM), + .apply_quirk = true, }; -static const struct intel_rapl_init_fun skl_rapl_init __initconst = { - .apply_quirk = false, - .cntr_mask = RAPL_IDX_SKL_CLN, - .attrs = rapl_events_skl_attr, +static struct rapl_model model_skl = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM) | + BIT(PERF_RAPL_PP1) | + BIT(PERF_RAPL_PSYS), + .apply_quirk = false, }; -static const struct x86_cpu_id rapl_cpu_match[] __initconst = { - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, snb_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsx_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsx_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsx_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, hsx_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, skl_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, hsw_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, hsw_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, skl_rapl_init), +static const struct x86_cpu_id rapl_model_match[] __initconst = { + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, model_snb), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, model_snbep), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, model_snb), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, model_snbep), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, model_hsx), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, model_hsx), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, model_hsx), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, model_knl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, model_knl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, model_hsx), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_DESKTOP, model_skl), {}, }; -MODULE_DEVICE_TABLE(x86cpu, rapl_cpu_match); +MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); static int __init rapl_pmu_init(void) { const struct x86_cpu_id *id; - struct intel_rapl_init_fun *rapl_init; - bool apply_quirk; + struct rapl_model *rm; int ret; - id = x86_match_cpu(rapl_cpu_match); + id = x86_match_cpu(rapl_model_match); if (!id) return -ENODEV; - rapl_init = (struct intel_rapl_init_fun *)id->driver_data; - apply_quirk = rapl_init->apply_quirk; - rapl_cntr_mask = rapl_init->cntr_mask; - rapl_pmu_events_group.attrs = rapl_init->attrs; + rm = (struct rapl_model *) id->driver_data; + rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX, + false, (void *) &rm->events); - ret = rapl_check_hw_unit(apply_quirk); + ret = rapl_check_hw_unit(rm->apply_quirk); if (ret) return ret; diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 6094c8db949d..3694a5d0703d 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -8,6 +8,7 @@ static struct intel_uncore_type *empty_uncore[] = { NULL, }; struct intel_uncore_type **uncore_msr_uncores = empty_uncore; struct intel_uncore_type **uncore_pci_uncores = empty_uncore; +struct intel_uncore_type **uncore_mmio_uncores = empty_uncore; static bool pcidrv_registered; struct pci_driver *uncore_pci_driver; @@ -28,7 +29,7 @@ struct event_constraint uncore_constraint_empty = MODULE_LICENSE("GPL"); -static int uncore_pcibus_to_physid(struct pci_bus *bus) +int uncore_pcibus_to_physid(struct pci_bus *bus) { struct pci2phy_map *map; int phys_id = -1; @@ -119,6 +120,21 @@ u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *eve return count; } +void uncore_mmio_exit_box(struct intel_uncore_box *box) +{ + if (box->io_addr) + iounmap(box->io_addr); +} + +u64 uncore_mmio_read_counter(struct intel_uncore_box *box, + struct perf_event *event) +{ + if (!box->io_addr) + return 0; + + return readq(box->io_addr + event->hw.event_base); +} + /* * generic get constraint function for shared match/mask registers. */ @@ -1143,12 +1159,27 @@ static void uncore_change_context(struct intel_uncore_type **uncores, uncore_change_type_ctx(*uncores, old_cpu, new_cpu); } -static int uncore_event_cpu_offline(unsigned int cpu) +static void uncore_box_unref(struct intel_uncore_type **types, int id) { - struct intel_uncore_type *type, **types = uncore_msr_uncores; + struct intel_uncore_type *type; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; - int i, die, target; + int i; + + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[id]; + if (box && atomic_dec_return(&box->refcnt) == 0) + uncore_box_exit(box); + } + } +} + +static int uncore_event_cpu_offline(unsigned int cpu) +{ + int die, target; /* Check if exiting cpu is used for collecting uncore events */ if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) @@ -1163,20 +1194,14 @@ static int uncore_event_cpu_offline(unsigned int cpu) target = -1; uncore_change_context(uncore_msr_uncores, cpu, target); + uncore_change_context(uncore_mmio_uncores, cpu, target); uncore_change_context(uncore_pci_uncores, cpu, target); unref: /* Clear the references */ die = topology_logical_die_id(cpu); - for (; *types; types++) { - type = *types; - pmu = type->pmus; - for (i = 0; i < type->num_boxes; i++, pmu++) { - box = pmu->boxes[die]; - if (box && atomic_dec_return(&box->refcnt) == 0) - uncore_box_exit(box); - } - } + uncore_box_unref(uncore_msr_uncores, die); + uncore_box_unref(uncore_mmio_uncores, die); return 0; } @@ -1219,15 +1244,15 @@ cleanup: return -ENOMEM; } -static int uncore_event_cpu_online(unsigned int cpu) +static int uncore_box_ref(struct intel_uncore_type **types, + int id, unsigned int cpu) { - struct intel_uncore_type *type, **types = uncore_msr_uncores; + struct intel_uncore_type *type; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; - int i, ret, die, target; + int i, ret; - die = topology_logical_die_id(cpu); - ret = allocate_boxes(types, die, cpu); + ret = allocate_boxes(types, id, cpu); if (ret) return ret; @@ -1235,11 +1260,23 @@ static int uncore_event_cpu_online(unsigned int cpu) type = *types; pmu = type->pmus; for (i = 0; i < type->num_boxes; i++, pmu++) { - box = pmu->boxes[die]; + box = pmu->boxes[id]; if (box && atomic_inc_return(&box->refcnt) == 1) uncore_box_init(box); } } + return 0; +} + +static int uncore_event_cpu_online(unsigned int cpu) +{ + int die, target, msr_ret, mmio_ret; + + die = topology_logical_die_id(cpu); + msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu); + mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu); + if (msr_ret && mmio_ret) + return -ENOMEM; /* * Check if there is an online cpu in the package @@ -1251,7 +1288,10 @@ static int uncore_event_cpu_online(unsigned int cpu) cpumask_set_cpu(cpu, &uncore_cpu_mask); - uncore_change_context(uncore_msr_uncores, -1, cpu); + if (!msr_ret) + uncore_change_context(uncore_msr_uncores, -1, cpu); + if (!mmio_ret) + uncore_change_context(uncore_mmio_uncores, -1, cpu); uncore_change_context(uncore_pci_uncores, -1, cpu); return 0; } @@ -1299,12 +1339,35 @@ err: return ret; } +static int __init uncore_mmio_init(void) +{ + struct intel_uncore_type **types = uncore_mmio_uncores; + int ret; + + ret = uncore_types_init(types, true); + if (ret) + goto err; + + for (; *types; types++) { + ret = type_pmu_register(*types); + if (ret) + goto err; + } + return 0; +err: + uncore_types_exit(uncore_mmio_uncores); + uncore_mmio_uncores = empty_uncore; + return ret; +} + + #define X86_UNCORE_MODEL_MATCH(model, init) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init } struct intel_uncore_init_fun { void (*cpu_init)(void); int (*pci_init)(void); + void (*mmio_init)(void); }; static const struct intel_uncore_init_fun nhm_uncore_init __initconst = { @@ -1375,6 +1438,12 @@ static const struct intel_uncore_init_fun icl_uncore_init __initconst = { .pci_init = skl_uncore_pci_init, }; +static const struct intel_uncore_init_fun snr_uncore_init __initconst = { + .cpu_init = snr_uncore_cpu_init, + .pci_init = snr_uncore_pci_init, + .mmio_init = snr_uncore_mmio_init, +}; + static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init), @@ -1403,6 +1472,8 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, icl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_NNPI, icl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_DESKTOP, icl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ATOM_TREMONT_X, snr_uncore_init), {}, }; @@ -1412,7 +1483,7 @@ static int __init intel_uncore_init(void) { const struct x86_cpu_id *id; struct intel_uncore_init_fun *uncore_init; - int pret = 0, cret = 0, ret; + int pret = 0, cret = 0, mret = 0, ret; id = x86_match_cpu(intel_uncore_match); if (!id) @@ -1435,7 +1506,12 @@ static int __init intel_uncore_init(void) cret = uncore_cpu_init(); } - if (cret && pret) + if (uncore_init->mmio_init) { + uncore_init->mmio_init(); + mret = uncore_mmio_init(); + } + + if (cret && pret && mret) return -ENODEV; /* Install hotplug callbacks to setup the targets for each package */ @@ -1449,6 +1525,7 @@ static int __init intel_uncore_init(void) err: uncore_types_exit(uncore_msr_uncores); + uncore_types_exit(uncore_mmio_uncores); uncore_pci_exit(); return ret; } @@ -1458,6 +1535,7 @@ static void __exit intel_uncore_exit(void) { cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE); uncore_types_exit(uncore_msr_uncores); + uncore_types_exit(uncore_mmio_uncores); uncore_pci_exit(); } module_exit(intel_uncore_exit); diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 33aba2504cb1..f36f7bebbc1b 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -2,6 +2,7 @@ #include <linux/slab.h> #include <linux/pci.h> #include <asm/apicdef.h> +#include <linux/io-64-nonatomic-lo-hi.h> #include <linux/perf_event.h> #include "../perf_event.h" @@ -56,7 +57,10 @@ struct intel_uncore_type { unsigned fixed_ctr; unsigned fixed_ctl; unsigned box_ctl; - unsigned msr_offset; + union { + unsigned msr_offset; + unsigned mmio_offset; + }; unsigned num_shared_regs:8; unsigned single_fixed:1; unsigned pair_ctr_ctl:1; @@ -125,7 +129,7 @@ struct intel_uncore_box { struct hrtimer hrtimer; struct list_head list; struct list_head active_list; - void *io_addr; + void __iomem *io_addr; struct intel_uncore_extra_reg shared_regs[0]; }; @@ -159,6 +163,7 @@ struct pci2phy_map { }; struct pci2phy_map *__find_pci2phy_map(int segment); +int uncore_pcibus_to_physid(struct pci_bus *bus); ssize_t uncore_event_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf); @@ -190,6 +195,13 @@ static inline bool uncore_pmc_freerunning(int idx) return idx == UNCORE_PMC_IDX_FREERUNNING; } +static inline +unsigned int uncore_mmio_box_ctl(struct intel_uncore_box *box) +{ + return box->pmu->type->box_ctl + + box->pmu->type->mmio_offset * box->pmu->pmu_idx; +} + static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box) { return box->pmu->type->box_ctl; @@ -330,7 +342,7 @@ unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx) static inline unsigned uncore_fixed_ctl(struct intel_uncore_box *box) { - if (box->pci_dev) + if (box->pci_dev || box->io_addr) return uncore_pci_fixed_ctl(box); else return uncore_msr_fixed_ctl(box); @@ -339,7 +351,7 @@ unsigned uncore_fixed_ctl(struct intel_uncore_box *box) static inline unsigned uncore_fixed_ctr(struct intel_uncore_box *box) { - if (box->pci_dev) + if (box->pci_dev || box->io_addr) return uncore_pci_fixed_ctr(box); else return uncore_msr_fixed_ctr(box); @@ -348,7 +360,7 @@ unsigned uncore_fixed_ctr(struct intel_uncore_box *box) static inline unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx) { - if (box->pci_dev) + if (box->pci_dev || box->io_addr) return uncore_pci_event_ctl(box, idx); else return uncore_msr_event_ctl(box, idx); @@ -357,7 +369,7 @@ unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx) static inline unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx) { - if (box->pci_dev) + if (box->pci_dev || box->io_addr) return uncore_pci_perf_ctr(box, idx); else return uncore_msr_perf_ctr(box, idx); @@ -419,6 +431,16 @@ static inline bool is_freerunning_event(struct perf_event *event) (((cfg >> 8) & 0xff) >= UNCORE_FREERUNNING_UMASK_START); } +/* Check and reject invalid config */ +static inline int uncore_freerunning_hw_config(struct intel_uncore_box *box, + struct perf_event *event) +{ + if (is_freerunning_event(event)) + return 0; + + return -EINVAL; +} + static inline void uncore_disable_box(struct intel_uncore_box *box) { if (box->pmu->type->ops->disable_box) @@ -482,6 +504,9 @@ static inline struct intel_uncore_box *uncore_event_to_box(struct perf_event *ev struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu); u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event); +void uncore_mmio_exit_box(struct intel_uncore_box *box); +u64 uncore_mmio_read_counter(struct intel_uncore_box *box, + struct perf_event *event); void uncore_pmu_start_hrtimer(struct intel_uncore_box *box); void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box); void uncore_pmu_event_start(struct perf_event *event, int flags); @@ -497,6 +522,7 @@ u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx); extern struct intel_uncore_type **uncore_msr_uncores; extern struct intel_uncore_type **uncore_pci_uncores; +extern struct intel_uncore_type **uncore_mmio_uncores; extern struct pci_driver *uncore_pci_driver; extern raw_spinlock_t pci2phy_map_lock; extern struct list_head pci2phy_map_head; @@ -528,6 +554,9 @@ int knl_uncore_pci_init(void); void knl_uncore_cpu_init(void); int skx_uncore_pci_init(void); void skx_uncore_cpu_init(void); +int snr_uncore_pci_init(void); +void snr_uncore_cpu_init(void); +void snr_uncore_mmio_init(void); /* uncore_nhmex.c */ void nhmex_uncore_cpu_init(void); diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index f8431819b3e1..dbaa1b088a30 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -3,27 +3,29 @@ #include "uncore.h" /* Uncore IMC PCI IDs */ -#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 -#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154 -#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 -#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 -#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 -#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 -#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x1904 -#define PCI_DEVICE_ID_INTEL_SKL_Y_IMC 0x190c -#define PCI_DEVICE_ID_INTEL_SKL_HD_IMC 0x1900 -#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 -#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f -#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f -#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c -#define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904 -#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914 -#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f -#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f -#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc -#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0 -#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10 -#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4 +#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 +#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154 +#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 +#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 +#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 +#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 +#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x1904 +#define PCI_DEVICE_ID_INTEL_SKL_Y_IMC 0x190c +#define PCI_DEVICE_ID_INTEL_SKL_HD_IMC 0x1900 +#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 +#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f +#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f +#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c +#define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904 +#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914 +#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f +#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f +#define PCI_DEVICE_ID_INTEL_KBL_HQ_IMC 0x5910 +#define PCI_DEVICE_ID_INTEL_KBL_WQ_IMC 0x5918 +#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc +#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0 +#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10 +#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4 #define PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC 0x3e0f #define PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC 0x3e1f #define PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC 0x3ec2 @@ -34,9 +36,15 @@ #define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC 0x3e33 #define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC 0x3eca #define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC 0x3e32 +#define PCI_DEVICE_ID_INTEL_AML_YD_IMC 0x590c +#define PCI_DEVICE_ID_INTEL_AML_YQ_IMC 0x590d +#define PCI_DEVICE_ID_INTEL_WHL_UQ_IMC 0x3ed0 +#define PCI_DEVICE_ID_INTEL_WHL_4_UQ_IMC 0x3e34 +#define PCI_DEVICE_ID_INTEL_WHL_UD_IMC 0x3e35 #define PCI_DEVICE_ID_INTEL_ICL_U_IMC 0x8a02 #define PCI_DEVICE_ID_INTEL_ICL_U2_IMC 0x8a12 + /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff #define SNB_UNC_CTL_UMASK_MASK 0x0000ff00 @@ -420,11 +428,6 @@ static void snb_uncore_imc_init_box(struct intel_uncore_box *box) box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL; } -static void snb_uncore_imc_exit_box(struct intel_uncore_box *box) -{ - iounmap(box->io_addr); -} - static void snb_uncore_imc_enable_box(struct intel_uncore_box *box) {} @@ -437,13 +440,6 @@ static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct per static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event) {} -static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - return (u64)*(unsigned int *)(box->io_addr + hwc->event_base); -} - /* * Keep the custom event_init() function compatible with old event * encoding for free running counters. @@ -570,13 +566,13 @@ static struct pmu snb_uncore_imc_pmu = { static struct intel_uncore_ops snb_uncore_imc_ops = { .init_box = snb_uncore_imc_init_box, - .exit_box = snb_uncore_imc_exit_box, + .exit_box = uncore_mmio_exit_box, .enable_box = snb_uncore_imc_enable_box, .disable_box = snb_uncore_imc_disable_box, .disable_event = snb_uncore_imc_disable_event, .enable_event = snb_uncore_imc_enable_event, .hw_config = snb_uncore_imc_hw_config, - .read_counter = snb_uncore_imc_read_counter, + .read_counter = uncore_mmio_read_counter, }; static struct intel_uncore_type snb_uncore_imc = { @@ -682,6 +678,14 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_HQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_WQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2U_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, @@ -737,6 +741,26 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AML_YD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AML_YQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_4_UQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -807,6 +831,8 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U Quad Core */ IMC_DEV(KBL_SD_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Dual Core */ IMC_DEV(KBL_SQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Quad Core */ + IMC_DEV(KBL_HQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core H Quad Core */ + IMC_DEV(KBL_WQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S 4 cores Work Station */ IMC_DEV(CFL_2U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 2 Cores */ IMC_DEV(CFL_4U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 4 Cores */ IMC_DEV(CFL_4H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 4 Cores */ @@ -821,6 +847,11 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Server */ IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Server */ IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Server */ + IMC_DEV(AML_YD_IMC, &skl_uncore_pci_driver), /* 8th Gen Core Y Mobile Dual Core */ + IMC_DEV(AML_YQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core Y Mobile Quad Core */ + IMC_DEV(WHL_UQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Quad Core */ + IMC_DEV(WHL_4_UQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Quad Core */ + IMC_DEV(WHL_UD_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Dual Core */ IMC_DEV(ICL_U_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */ IMC_DEV(ICL_U2_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */ { /* end marker */ } diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index bbe89bc589f9..b10a5ec79e48 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -324,12 +324,77 @@ #define SKX_M2M_PCI_PMON_CTR0 0x200 #define SKX_M2M_PCI_PMON_BOX_CTL 0x258 +/* SNR Ubox */ +#define SNR_U_MSR_PMON_CTR0 0x1f98 +#define SNR_U_MSR_PMON_CTL0 0x1f91 +#define SNR_U_MSR_PMON_UCLK_FIXED_CTL 0x1f93 +#define SNR_U_MSR_PMON_UCLK_FIXED_CTR 0x1f94 + +/* SNR CHA */ +#define SNR_CHA_RAW_EVENT_MASK_EXT 0x3ffffff +#define SNR_CHA_MSR_PMON_CTL0 0x1c01 +#define SNR_CHA_MSR_PMON_CTR0 0x1c08 +#define SNR_CHA_MSR_PMON_BOX_CTL 0x1c00 +#define SNR_C0_MSR_PMON_BOX_FILTER0 0x1c05 + + +/* SNR IIO */ +#define SNR_IIO_MSR_PMON_CTL0 0x1e08 +#define SNR_IIO_MSR_PMON_CTR0 0x1e01 +#define SNR_IIO_MSR_PMON_BOX_CTL 0x1e00 +#define SNR_IIO_MSR_OFFSET 0x10 +#define SNR_IIO_PMON_RAW_EVENT_MASK_EXT 0x7ffff + +/* SNR IRP */ +#define SNR_IRP0_MSR_PMON_CTL0 0x1ea8 +#define SNR_IRP0_MSR_PMON_CTR0 0x1ea1 +#define SNR_IRP0_MSR_PMON_BOX_CTL 0x1ea0 +#define SNR_IRP_MSR_OFFSET 0x10 + +/* SNR M2PCIE */ +#define SNR_M2PCIE_MSR_PMON_CTL0 0x1e58 +#define SNR_M2PCIE_MSR_PMON_CTR0 0x1e51 +#define SNR_M2PCIE_MSR_PMON_BOX_CTL 0x1e50 +#define SNR_M2PCIE_MSR_OFFSET 0x10 + +/* SNR PCU */ +#define SNR_PCU_MSR_PMON_CTL0 0x1ef1 +#define SNR_PCU_MSR_PMON_CTR0 0x1ef8 +#define SNR_PCU_MSR_PMON_BOX_CTL 0x1ef0 +#define SNR_PCU_MSR_PMON_BOX_FILTER 0x1efc + +/* SNR M2M */ +#define SNR_M2M_PCI_PMON_CTL0 0x468 +#define SNR_M2M_PCI_PMON_CTR0 0x440 +#define SNR_M2M_PCI_PMON_BOX_CTL 0x438 +#define SNR_M2M_PCI_PMON_UMASK_EXT 0xff + +/* SNR PCIE3 */ +#define SNR_PCIE3_PCI_PMON_CTL0 0x508 +#define SNR_PCIE3_PCI_PMON_CTR0 0x4e8 +#define SNR_PCIE3_PCI_PMON_BOX_CTL 0x4e4 + +/* SNR IMC */ +#define SNR_IMC_MMIO_PMON_FIXED_CTL 0x54 +#define SNR_IMC_MMIO_PMON_FIXED_CTR 0x38 +#define SNR_IMC_MMIO_PMON_CTL0 0x40 +#define SNR_IMC_MMIO_PMON_CTR0 0x8 +#define SNR_IMC_MMIO_PMON_BOX_CTL 0x22800 +#define SNR_IMC_MMIO_OFFSET 0x4000 +#define SNR_IMC_MMIO_SIZE 0x4000 +#define SNR_IMC_MMIO_BASE_OFFSET 0xd0 +#define SNR_IMC_MMIO_BASE_MASK 0x1FFFFFFF +#define SNR_IMC_MMIO_MEM0_OFFSET 0xd8 +#define SNR_IMC_MMIO_MEM0_MASK 0x7FF + DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6"); DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-43,45-55"); +DEFINE_UNCORE_FORMAT_ATTR(umask_ext2, umask, "config:8-15,32-57"); +DEFINE_UNCORE_FORMAT_ATTR(umask_ext3, umask, "config:8-15,32-39"); DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); @@ -343,11 +408,14 @@ DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30"); DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51"); DEFINE_UNCORE_FORMAT_ATTR(occ_edge_det, occ_edge_det, "config:31"); DEFINE_UNCORE_FORMAT_ATTR(ch_mask, ch_mask, "config:36-43"); +DEFINE_UNCORE_FORMAT_ATTR(ch_mask2, ch_mask, "config:36-47"); DEFINE_UNCORE_FORMAT_ATTR(fc_mask, fc_mask, "config:44-46"); +DEFINE_UNCORE_FORMAT_ATTR(fc_mask2, fc_mask, "config:48-50"); DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4"); DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0"); DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5"); DEFINE_UNCORE_FORMAT_ATTR(filter_tid4, filter_tid, "config1:0-8"); +DEFINE_UNCORE_FORMAT_ATTR(filter_tid5, filter_tid, "config1:0-9"); DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5"); DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8"); DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8"); @@ -3585,6 +3653,7 @@ static struct uncore_event_desc skx_uncore_iio_freerunning_events[] = { static struct intel_uncore_ops skx_uncore_iio_freerunning_ops = { .read_counter = uncore_msr_read_counter, + .hw_config = uncore_freerunning_hw_config, }; static struct attribute *skx_uncore_iio_freerunning_formats_attr[] = { @@ -3967,3 +4036,535 @@ int skx_uncore_pci_init(void) } /* end of SKX uncore support */ + +/* SNR uncore support */ + +static struct intel_uncore_type snr_uncore_ubox = { + .name = "ubox", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .perf_ctr = SNR_U_MSR_PMON_CTR0, + .event_ctl = SNR_U_MSR_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .fixed_ctr = SNR_U_MSR_PMON_UCLK_FIXED_CTR, + .fixed_ctl = SNR_U_MSR_PMON_UCLK_FIXED_CTL, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_format_group, +}; + +static struct attribute *snr_uncore_cha_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask_ext2.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid5.attr, + NULL, +}; +static const struct attribute_group snr_uncore_chabox_format_group = { + .name = "format", + .attrs = snr_uncore_cha_formats_attr, +}; + +static int snr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + + reg1->reg = SNR_C0_MSR_PMON_BOX_FILTER0 + + box->pmu->type->msr_offset * box->pmu->pmu_idx; + reg1->config = event->attr.config1 & SKX_CHA_MSR_PMON_BOX_FILTER_TID; + reg1->idx = 0; + + return 0; +} + +static void snr_cha_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) + wrmsrl(reg1->reg, reg1->config); + + wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static struct intel_uncore_ops snr_uncore_chabox_ops = { + .init_box = ivbep_uncore_msr_init_box, + .disable_box = snbep_uncore_msr_disable_box, + .enable_box = snbep_uncore_msr_enable_box, + .disable_event = snbep_uncore_msr_disable_event, + .enable_event = snr_cha_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = snr_cha_hw_config, +}; + +static struct intel_uncore_type snr_uncore_chabox = { + .name = "cha", + .num_counters = 4, + .num_boxes = 6, + .perf_ctr_bits = 48, + .event_ctl = SNR_CHA_MSR_PMON_CTL0, + .perf_ctr = SNR_CHA_MSR_PMON_CTR0, + .box_ctl = SNR_CHA_MSR_PMON_BOX_CTL, + .msr_offset = HSWEP_CBO_MSR_OFFSET, + .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_CHA_RAW_EVENT_MASK_EXT, + .ops = &snr_uncore_chabox_ops, + .format_group = &snr_uncore_chabox_format_group, +}; + +static struct attribute *snr_uncore_iio_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh9.attr, + &format_attr_ch_mask2.attr, + &format_attr_fc_mask2.attr, + NULL, +}; + +static const struct attribute_group snr_uncore_iio_format_group = { + .name = "format", + .attrs = snr_uncore_iio_formats_attr, +}; + +static struct intel_uncore_type snr_uncore_iio = { + .name = "iio", + .num_counters = 4, + .num_boxes = 5, + .perf_ctr_bits = 48, + .event_ctl = SNR_IIO_MSR_PMON_CTL0, + .perf_ctr = SNR_IIO_MSR_PMON_CTR0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT, + .box_ctl = SNR_IIO_MSR_PMON_BOX_CTL, + .msr_offset = SNR_IIO_MSR_OFFSET, + .ops = &ivbep_uncore_msr_ops, + .format_group = &snr_uncore_iio_format_group, +}; + +static struct intel_uncore_type snr_uncore_irp = { + .name = "irp", + .num_counters = 2, + .num_boxes = 5, + .perf_ctr_bits = 48, + .event_ctl = SNR_IRP0_MSR_PMON_CTL0, + .perf_ctr = SNR_IRP0_MSR_PMON_CTR0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNR_IRP0_MSR_PMON_BOX_CTL, + .msr_offset = SNR_IRP_MSR_OFFSET, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_format_group, +}; + +static struct intel_uncore_type snr_uncore_m2pcie = { + .name = "m2pcie", + .num_counters = 4, + .num_boxes = 5, + .perf_ctr_bits = 48, + .event_ctl = SNR_M2PCIE_MSR_PMON_CTL0, + .perf_ctr = SNR_M2PCIE_MSR_PMON_CTR0, + .box_ctl = SNR_M2PCIE_MSR_PMON_BOX_CTL, + .msr_offset = SNR_M2PCIE_MSR_OFFSET, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_format_group, +}; + +static int snr_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK; + + if (ev_sel >= 0xb && ev_sel <= 0xe) { + reg1->reg = SNR_PCU_MSR_PMON_BOX_FILTER; + reg1->idx = ev_sel - 0xb; + reg1->config = event->attr.config1 & (0xff << reg1->idx); + } + return 0; +} + +static struct intel_uncore_ops snr_uncore_pcu_ops = { + IVBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = snr_pcu_hw_config, + .get_constraint = snbep_pcu_get_constraint, + .put_constraint = snbep_pcu_put_constraint, +}; + +static struct intel_uncore_type snr_uncore_pcu = { + .name = "pcu", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNR_PCU_MSR_PMON_CTR0, + .event_ctl = SNR_PCU_MSR_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNR_PCU_MSR_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &snr_uncore_pcu_ops, + .format_group = &skx_uncore_pcu_format_group, +}; + +enum perf_uncore_snr_iio_freerunning_type_id { + SNR_IIO_MSR_IOCLK, + SNR_IIO_MSR_BW_IN, + + SNR_IIO_FREERUNNING_TYPE_MAX, +}; + +static struct freerunning_counters snr_iio_freerunning[] = { + [SNR_IIO_MSR_IOCLK] = { 0x1eac, 0x1, 0x10, 1, 48 }, + [SNR_IIO_MSR_BW_IN] = { 0x1f00, 0x1, 0x10, 8, 48 }, +}; + +static struct uncore_event_desc snr_uncore_iio_freerunning_events[] = { + /* Free-Running IIO CLOCKS Counter */ + INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), + /* Free-Running IIO BANDWIDTH IN Counters */ + INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type snr_uncore_iio_free_running = { + .name = "iio_free_running", + .num_counters = 9, + .num_boxes = 5, + .num_freerunning_types = SNR_IIO_FREERUNNING_TYPE_MAX, + .freerunning = snr_iio_freerunning, + .ops = &skx_uncore_iio_freerunning_ops, + .event_descs = snr_uncore_iio_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +static struct intel_uncore_type *snr_msr_uncores[] = { + &snr_uncore_ubox, + &snr_uncore_chabox, + &snr_uncore_iio, + &snr_uncore_irp, + &snr_uncore_m2pcie, + &snr_uncore_pcu, + &snr_uncore_iio_free_running, + NULL, +}; + +void snr_uncore_cpu_init(void) +{ + uncore_msr_uncores = snr_msr_uncores; +} + +static void snr_m2m_uncore_pci_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + + __set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags); + pci_write_config_dword(pdev, box_ctl, IVBEP_PMON_BOX_CTL_INT); +} + +static struct intel_uncore_ops snr_m2m_uncore_pci_ops = { + .init_box = snr_m2m_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = snbep_uncore_pci_disable_event, + .enable_event = snbep_uncore_pci_enable_event, + .read_counter = snbep_uncore_pci_read_counter, +}; + +static struct attribute *snr_m2m_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask_ext3.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static const struct attribute_group snr_m2m_uncore_format_group = { + .name = "format", + .attrs = snr_m2m_uncore_formats_attr, +}; + +static struct intel_uncore_type snr_uncore_m2m = { + .name = "m2m", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNR_M2M_PCI_PMON_CTR0, + .event_ctl = SNR_M2M_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_M2M_PCI_PMON_UMASK_EXT, + .box_ctl = SNR_M2M_PCI_PMON_BOX_CTL, + .ops = &snr_m2m_uncore_pci_ops, + .format_group = &snr_m2m_uncore_format_group, +}; + +static struct intel_uncore_type snr_uncore_pcie3 = { + .name = "pcie3", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNR_PCIE3_PCI_PMON_CTR0, + .event_ctl = SNR_PCIE3_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNR_PCIE3_PCI_PMON_BOX_CTL, + .ops = &ivbep_uncore_pci_ops, + .format_group = &ivbep_uncore_format_group, +}; + +enum { + SNR_PCI_UNCORE_M2M, + SNR_PCI_UNCORE_PCIE3, +}; + +static struct intel_uncore_type *snr_pci_uncores[] = { + [SNR_PCI_UNCORE_M2M] = &snr_uncore_m2m, + [SNR_PCI_UNCORE_PCIE3] = &snr_uncore_pcie3, + NULL, +}; + +static const struct pci_device_id snr_uncore_pci_ids[] = { + { /* M2M */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 0, SNR_PCI_UNCORE_M2M, 0), + }, + { /* PCIe3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x334a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(4, 0, SNR_PCI_UNCORE_PCIE3, 0), + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver snr_uncore_pci_driver = { + .name = "snr_uncore", + .id_table = snr_uncore_pci_ids, +}; + +int snr_uncore_pci_init(void) +{ + /* SNR UBOX DID */ + int ret = snbep_pci2phy_map_init(0x3460, SKX_CPUNODEID, + SKX_GIDNIDMAP, true); + + if (ret) + return ret; + + uncore_pci_uncores = snr_pci_uncores; + uncore_pci_driver = &snr_uncore_pci_driver; + return 0; +} + +static struct pci_dev *snr_uncore_get_mc_dev(int id) +{ + struct pci_dev *mc_dev = NULL; + int phys_id, pkg; + + while (1) { + mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3451, mc_dev); + if (!mc_dev) + break; + phys_id = uncore_pcibus_to_physid(mc_dev->bus); + if (phys_id < 0) + continue; + pkg = topology_phys_to_logical_pkg(phys_id); + if (pkg < 0) + continue; + else if (pkg == id) + break; + } + return mc_dev; +} + +static void snr_uncore_mmio_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = snr_uncore_get_mc_dev(box->dieid); + unsigned int box_ctl = uncore_mmio_box_ctl(box); + resource_size_t addr; + u32 pci_dword; + + if (!pdev) + return; + + pci_read_config_dword(pdev, SNR_IMC_MMIO_BASE_OFFSET, &pci_dword); + addr = (pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23; + + pci_read_config_dword(pdev, SNR_IMC_MMIO_MEM0_OFFSET, &pci_dword); + addr |= (pci_dword & SNR_IMC_MMIO_MEM0_MASK) << 12; + + addr += box_ctl; + + box->io_addr = ioremap(addr, SNR_IMC_MMIO_SIZE); + if (!box->io_addr) + return; + + writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr); +} + +static void snr_uncore_mmio_disable_box(struct intel_uncore_box *box) +{ + u32 config; + + if (!box->io_addr) + return; + + config = readl(box->io_addr); + config |= SNBEP_PMON_BOX_CTL_FRZ; + writel(config, box->io_addr); +} + +static void snr_uncore_mmio_enable_box(struct intel_uncore_box *box) +{ + u32 config; + + if (!box->io_addr) + return; + + config = readl(box->io_addr); + config &= ~SNBEP_PMON_BOX_CTL_FRZ; + writel(config, box->io_addr); +} + +static void snr_uncore_mmio_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!box->io_addr) + return; + + writel(hwc->config | SNBEP_PMON_CTL_EN, + box->io_addr + hwc->config_base); +} + +static void snr_uncore_mmio_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!box->io_addr) + return; + + writel(hwc->config, box->io_addr + hwc->config_base); +} + +static struct intel_uncore_ops snr_uncore_mmio_ops = { + .init_box = snr_uncore_mmio_init_box, + .exit_box = uncore_mmio_exit_box, + .disable_box = snr_uncore_mmio_disable_box, + .enable_box = snr_uncore_mmio_enable_box, + .disable_event = snr_uncore_mmio_disable_event, + .enable_event = snr_uncore_mmio_enable_event, + .read_counter = uncore_mmio_read_counter, +}; + +static struct uncore_event_desc snr_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x00,umask=0x00"), + INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x0f"), + INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x30"), + INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type snr_uncore_imc = { + .name = "imc", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, + .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, + .event_descs = snr_uncore_imc_events, + .perf_ctr = SNR_IMC_MMIO_PMON_CTR0, + .event_ctl = SNR_IMC_MMIO_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNR_IMC_MMIO_PMON_BOX_CTL, + .mmio_offset = SNR_IMC_MMIO_OFFSET, + .ops = &snr_uncore_mmio_ops, + .format_group = &skx_uncore_format_group, +}; + +enum perf_uncore_snr_imc_freerunning_type_id { + SNR_IMC_DCLK, + SNR_IMC_DDR, + + SNR_IMC_FREERUNNING_TYPE_MAX, +}; + +static struct freerunning_counters snr_imc_freerunning[] = { + [SNR_IMC_DCLK] = { 0x22b0, 0x0, 0, 1, 48 }, + [SNR_IMC_DDR] = { 0x2290, 0x8, 0, 2, 48 }, +}; + +static struct uncore_event_desc snr_uncore_imc_freerunning_events[] = { + INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"), + + INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(read.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"), + INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"), +}; + +static struct intel_uncore_ops snr_uncore_imc_freerunning_ops = { + .init_box = snr_uncore_mmio_init_box, + .exit_box = uncore_mmio_exit_box, + .read_counter = uncore_mmio_read_counter, + .hw_config = uncore_freerunning_hw_config, +}; + +static struct intel_uncore_type snr_uncore_imc_free_running = { + .name = "imc_free_running", + .num_counters = 3, + .num_boxes = 1, + .num_freerunning_types = SNR_IMC_FREERUNNING_TYPE_MAX, + .freerunning = snr_imc_freerunning, + .ops = &snr_uncore_imc_freerunning_ops, + .event_descs = snr_uncore_imc_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +static struct intel_uncore_type *snr_mmio_uncores[] = { + &snr_uncore_imc, + &snr_uncore_imc_free_running, + NULL, +}; + +void snr_uncore_mmio_init(void) +{ + uncore_mmio_uncores = snr_mmio_uncores; +} + +/* end of SNR uncore support */ diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index f3f4c2263501..9431447541e9 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/perf_event.h> +#include <linux/sysfs.h> #include <linux/nospec.h> #include <asm/intel-family.h> +#include "probe.h" enum perf_msr_id { PERF_MSR_TSC = 0, @@ -12,32 +14,30 @@ enum perf_msr_id { PERF_MSR_PTSC = 5, PERF_MSR_IRPERF = 6, PERF_MSR_THERM = 7, - PERF_MSR_THERM_SNAP = 8, - PERF_MSR_THERM_UNIT = 9, PERF_MSR_EVENT_MAX, }; -static bool test_aperfmperf(int idx) +static bool test_aperfmperf(int idx, void *data) { return boot_cpu_has(X86_FEATURE_APERFMPERF); } -static bool test_ptsc(int idx) +static bool test_ptsc(int idx, void *data) { return boot_cpu_has(X86_FEATURE_PTSC); } -static bool test_irperf(int idx) +static bool test_irperf(int idx, void *data) { return boot_cpu_has(X86_FEATURE_IRPERF); } -static bool test_therm_status(int idx) +static bool test_therm_status(int idx, void *data) { return boot_cpu_has(X86_FEATURE_DTHERM); } -static bool test_intel(int idx) +static bool test_intel(int idx, void *data) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || boot_cpu_data.x86 != 6) @@ -98,37 +98,51 @@ static bool test_intel(int idx) return false; } -struct perf_msr { - u64 msr; - struct perf_pmu_events_attr *attr; - bool (*test)(int idx); +PMU_EVENT_ATTR_STRING(tsc, attr_tsc, "event=0x00" ); +PMU_EVENT_ATTR_STRING(aperf, attr_aperf, "event=0x01" ); +PMU_EVENT_ATTR_STRING(mperf, attr_mperf, "event=0x02" ); +PMU_EVENT_ATTR_STRING(pperf, attr_pperf, "event=0x03" ); +PMU_EVENT_ATTR_STRING(smi, attr_smi, "event=0x04" ); +PMU_EVENT_ATTR_STRING(ptsc, attr_ptsc, "event=0x05" ); +PMU_EVENT_ATTR_STRING(irperf, attr_irperf, "event=0x06" ); +PMU_EVENT_ATTR_STRING(cpu_thermal_margin, attr_therm, "event=0x07" ); +PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, attr_therm_snap, "1" ); +PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, attr_therm_unit, "C" ); + +static unsigned long msr_mask; + +PMU_EVENT_GROUP(events, aperf); +PMU_EVENT_GROUP(events, mperf); +PMU_EVENT_GROUP(events, pperf); +PMU_EVENT_GROUP(events, smi); +PMU_EVENT_GROUP(events, ptsc); +PMU_EVENT_GROUP(events, irperf); + +static struct attribute *attrs_therm[] = { + &attr_therm.attr.attr, + &attr_therm_snap.attr.attr, + &attr_therm_unit.attr.attr, + NULL, }; -PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00" ); -PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01" ); -PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02" ); -PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03" ); -PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04" ); -PMU_EVENT_ATTR_STRING(ptsc, evattr_ptsc, "event=0x05" ); -PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06" ); -PMU_EVENT_ATTR_STRING(cpu_thermal_margin, evattr_therm, "event=0x07" ); -PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, evattr_therm_snap, "1" ); -PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, evattr_therm_unit, "C" ); +static struct attribute_group group_therm = { + .name = "events", + .attrs = attrs_therm, +}; static struct perf_msr msr[] = { - [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, }, - [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, }, - [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, }, - [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, }, - [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, }, - [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &evattr_ptsc, test_ptsc, }, - [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &evattr_irperf, test_irperf, }, - [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, &evattr_therm, test_therm_status, }, - [PERF_MSR_THERM_SNAP] = { MSR_IA32_THERM_STATUS, &evattr_therm_snap, test_therm_status, }, - [PERF_MSR_THERM_UNIT] = { MSR_IA32_THERM_STATUS, &evattr_therm_unit, test_therm_status, }, + [PERF_MSR_TSC] = { .no_check = true, }, + [PERF_MSR_APERF] = { MSR_IA32_APERF, &group_aperf, test_aperfmperf, }, + [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &group_mperf, test_aperfmperf, }, + [PERF_MSR_PPERF] = { MSR_PPERF, &group_pperf, test_intel, }, + [PERF_MSR_SMI] = { MSR_SMI_COUNT, &group_smi, test_intel, }, + [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &group_ptsc, test_ptsc, }, + [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &group_irperf, test_irperf, }, + [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, &group_therm, test_therm_status, }, }; -static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = { +static struct attribute *events_attrs[] = { + &attr_tsc.attr.attr, NULL, }; @@ -153,6 +167,17 @@ static const struct attribute_group *attr_groups[] = { NULL, }; +const struct attribute_group *attr_update[] = { + &group_aperf, + &group_mperf, + &group_pperf, + &group_smi, + &group_ptsc, + &group_irperf, + &group_therm, + NULL, +}; + static int msr_event_init(struct perf_event *event) { u64 cfg = event->attr.config; @@ -169,7 +194,7 @@ static int msr_event_init(struct perf_event *event) cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX); - if (!msr[cfg].attr) + if (!(msr_mask & (1 << cfg))) return -EINVAL; event->hw.idx = -1; @@ -252,32 +277,17 @@ static struct pmu pmu_msr = { .stop = msr_event_stop, .read = msr_event_update, .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE, + .attr_update = attr_update, }; static int __init msr_init(void) { - int i, j = 0; - if (!boot_cpu_has(X86_FEATURE_TSC)) { pr_cont("no MSR PMU driver.\n"); return 0; } - /* Probe the MSRs. */ - for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) { - u64 val; - - /* Virt sucks; you cannot tell if a R/O MSR is present :/ */ - if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val)) - msr[i].attr = NULL; - } - - /* List remaining MSRs in the sysfs attrs. */ - for (i = 0; i < PERF_MSR_EVENT_MAX; i++) { - if (msr[i].attr) - events_attrs[j++] = &msr[i].attr->attr.attr; - } - events_attrs[j] = NULL; + msr_mask = perf_msr_probe(msr, PERF_MSR_EVENT_MAX, true, NULL); perf_pmu_register(&pmu_msr, "msr", -1); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 4e346856ee19..8751008fc170 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -613,14 +613,11 @@ struct x86_pmu { int attr_rdpmc_broken; int attr_rdpmc; struct attribute **format_attrs; - struct attribute **event_attrs; - struct attribute **caps_attrs; ssize_t (*events_sysfs_show)(char *page, u64 config); - struct attribute **cpu_events; + const struct attribute_group **attr_update; unsigned long attr_freeze_on_smi; - struct attribute **attrs; /* * CPU Hotplug hooks @@ -886,8 +883,6 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip) ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event); ssize_t intel_event_sysfs_show(char *page, u64 config); -struct attribute **merge_attr(struct attribute **a, struct attribute **b); - ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, diff --git a/arch/x86/events/probe.c b/arch/x86/events/probe.c new file mode 100644 index 000000000000..c2ede2f3b277 --- /dev/null +++ b/arch/x86/events/probe.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/export.h> +#include <linux/types.h> +#include <linux/bits.h> +#include "probe.h" + +static umode_t +not_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return 0; +} + +unsigned long +perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data) +{ + unsigned long avail = 0; + unsigned int bit; + u64 val; + + if (cnt >= BITS_PER_LONG) + return 0; + + for (bit = 0; bit < cnt; bit++) { + if (!msr[bit].no_check) { + struct attribute_group *grp = msr[bit].grp; + + grp->is_visible = not_visible; + + if (msr[bit].test && !msr[bit].test(bit, data)) + continue; + /* Virt sucks; you cannot tell if a R/O MSR is present :/ */ + if (rdmsrl_safe(msr[bit].msr, &val)) + continue; + /* Disable zero counters if requested. */ + if (!zero && !val) + continue; + + grp->is_visible = NULL; + } + avail |= BIT(bit); + } + + return avail; +} +EXPORT_SYMBOL_GPL(perf_msr_probe); diff --git a/arch/x86/events/probe.h b/arch/x86/events/probe.h new file mode 100644 index 000000000000..4c8e0afc5fb5 --- /dev/null +++ b/arch/x86/events/probe.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ARCH_X86_EVENTS_PROBE_H__ +#define __ARCH_X86_EVENTS_PROBE_H__ +#include <linux/sysfs.h> + +struct perf_msr { + u64 msr; + struct attribute_group *grp; + bool (*test)(int idx, void *data); + bool no_check; +}; + +unsigned long +perf_msr_probe(struct perf_msr *msr, int cnt, bool no_zero, void *data); + +#define __PMU_EVENT_GROUP(_name) \ +static struct attribute *attrs_##_name[] = { \ + &attr_##_name.attr.attr, \ + NULL, \ +} + +#define PMU_EVENT_GROUP(_grp, _name) \ +__PMU_EVENT_GROUP(_name); \ +static struct attribute_group group_##_name = { \ + .name = #_grp, \ + .attrs = attrs_##_name, \ +} + +#endif /* __ARCH_X86_EVENTS_PROBE_H__ */ diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index a43212036257..64a6c952091e 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -237,6 +237,14 @@ COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags, unsigned long, newsp, int __user *, parent_tidptr, unsigned long, tls_val, int __user *, child_tidptr) { - return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, - tls_val); + struct kernel_clone_args args = { + .flags = (clone_flags & ~CSIGNAL), + .child_tid = child_tidptr, + .parent_tid = parent_tidptr, + .exit_signal = (clone_flags & CSIGNAL), + .stack = newsp, + .tls = tls_val, + }; + + return _do_fork(&args); } diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index f6f6ef436599..101eb944f13c 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h @@ -24,7 +24,7 @@ static void sanitize_boot_params(struct boot_params *boot_params) * IMPORTANT NOTE TO BOOTLOADER AUTHORS: do not simply clear * this field. The purpose of this field is to guarantee * compliance with the x86 boot spec located in - * Documentation/x86/boot.txt . That spec says that the + * Documentation/x86/boot.rst . That spec says that the * *whole* structure should be cleared, after which only the * portion defined by struct setup_header (boot_params->hdr) * should be copied in. diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 793c14c372cb..288b065955b7 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -48,7 +48,7 @@ #define __START_KERNEL_map _AC(0xffffffff80000000, UL) -/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ +/* See Documentation/x86/x86_64/mm.rst for a description of the memory map. */ #define __PHYSICAL_MASK_SHIFT 52 diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 88bca456da99..52e5f5f2240d 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -103,7 +103,7 @@ extern unsigned int ptrs_per_p4d; #define PGDIR_MASK (~(PGDIR_SIZE - 1)) /* - * See Documentation/x86/x86_64/mm.txt for a description of the memory map. + * See Documentation/x86/x86_64/mm.rst for a description of the memory map. * * Be very careful vs. KASLR when changing anything here. The KASLR address * range must not overlap with anything except the KASAN shadow area, which diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 8ea1cfdbeabc..71b32f2570ab 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -13,4 +13,6 @@ extern char __end_rodata_aligned[]; extern char __end_rodata_hpage_align[]; #endif +extern char __end_of_kernel_reserve[]; + #endif /* _ASM_X86_SECTIONS_H */ diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 146859efd83c..097589753fec 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -54,5 +54,6 @@ # define __ARCH_WANT_SYS_FORK # define __ARCH_WANT_SYS_VFORK # define __ARCH_WANT_SYS_CLONE +# define __ARCH_WANT_SYS_CLONE3 #endif /* _ASM_X86_UNISTD_H */ diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 60733f137e9a..c895df5482c5 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -29,6 +29,8 @@ #define XLF_EFI_HANDOVER_32 (1<<2) #define XLF_EFI_HANDOVER_64 (1<<3) #define XLF_EFI_KEXEC (1<<4) +#define XLF_5LEVEL (1<<5) +#define XLF_5LEVEL_ENABLED (1<<6) #ifndef __ASSEMBLY__ diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 4ddadf672ab5..a0e52bd00ecc 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -59,7 +59,7 @@ static u8 amd_ucode_patch[PATCH_MAX_SIZE]; /* * Microcode patch container file is prepended to the initrd in cpio - * format. See Documentation/x86/microcode.txt + * format. See Documentation/x86/microcode.rst */ static const char ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin"; diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index a55094b5f452..2bf70a2fed90 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -375,6 +375,12 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd, memmap_entry_callback); + /* Add e820 reserved ranges */ + cmd.type = E820_TYPE_RESERVED; + flags = IORESOURCE_MEM; + walk_iomem_res_desc(IORES_DESC_RESERVED, flags, 0, -1, &cmd, + memmap_entry_callback); + /* Add crashk_low_res region */ if (crashk_low_res.end) { ei.addr = crashk_low_res.start; diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 8f32e705a980..e69408bf664b 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1063,10 +1063,10 @@ static unsigned long __init e820_type_to_iores_desc(struct e820_entry *entry) case E820_TYPE_NVS: return IORES_DESC_ACPI_NV_STORAGE; case E820_TYPE_PMEM: return IORES_DESC_PERSISTENT_MEMORY; case E820_TYPE_PRAM: return IORES_DESC_PERSISTENT_MEMORY_LEGACY; + case E820_TYPE_RESERVED: return IORES_DESC_RESERVED; case E820_TYPE_RESERVED_KERN: /* Fall-through: */ case E820_TYPE_RAM: /* Fall-through: */ case E820_TYPE_UNUSABLE: /* Fall-through: */ - case E820_TYPE_RESERVED: /* Fall-through: */ default: return IORES_DESC_NONE; } } diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index f03237e3f192..5ebcd02cbca7 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -319,6 +319,11 @@ static int bzImage64_probe(const char *buf, unsigned long len) return ret; } + if (!(header->xloadflags & XLF_5LEVEL) && pgtable_l5_enabled()) { + pr_err("bzImage cannot handle 5-level paging mode.\n"); + return ret; + } + /* I've got a bzImage */ pr_debug("It's a relocatable bzImage64\n"); ret = 0; @@ -414,7 +419,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel, efi_map_offset = params_cmdline_sz; efi_setup_data_offset = efi_map_offset + ALIGN(efi_map_sz, 16); - /* Copy setup header onto bootparams. Documentation/x86/boot.txt */ + /* Copy setup header onto bootparams. Documentation/x86/boot.rst */ setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset; /* Is there a limit on setup header size? */ diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index bd17dbb15d6a..0e0b08008b5a 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -808,7 +808,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) continue; /* * Return probes must be pushed on this hash list correct - * order (same as return order) so that it can be poped + * order (same as return order) so that it can be popped * correctly. However, if we find it is pushed it incorrect * order, this means we find a function which should not be * probed, because the wrong order entry is pushed on the diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index d7be2376ac0b..5dcd438ad8f2 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -16,6 +16,7 @@ #include <linux/io.h> #include <linux/suspend.h> #include <linux/vmalloc.h> +#include <linux/efi.h> #include <asm/init.h> #include <asm/pgtable.h> @@ -27,6 +28,55 @@ #include <asm/setup.h> #include <asm/set_memory.h> +#ifdef CONFIG_ACPI +/* + * Used while adding mapping for ACPI tables. + * Can be reused when other iomem regions need be mapped + */ +struct init_pgtable_data { + struct x86_mapping_info *info; + pgd_t *level4p; +}; + +static int mem_region_callback(struct resource *res, void *arg) +{ + struct init_pgtable_data *data = arg; + unsigned long mstart, mend; + + mstart = res->start; + mend = mstart + resource_size(res) - 1; + + return kernel_ident_mapping_init(data->info, data->level4p, mstart, mend); +} + +static int +map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) +{ + struct init_pgtable_data data; + unsigned long flags; + int ret; + + data.info = info; + data.level4p = level4p; + flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + ret = walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, + &data, mem_region_callback); + if (ret && ret != -EINVAL) + return ret; + + /* ACPI tables could be located in ACPI Non-volatile Storage region */ + ret = walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, + &data, mem_region_callback); + if (ret && ret != -EINVAL) + return ret; + + return 0; +} +#else +static int map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) { return 0; } +#endif + #ifdef CONFIG_KEXEC_FILE const struct kexec_file_ops * const kexec_file_loaders[] = { &kexec_bzImage64_ops, @@ -34,6 +84,31 @@ const struct kexec_file_ops * const kexec_file_loaders[] = { }; #endif +static int +map_efi_systab(struct x86_mapping_info *info, pgd_t *level4p) +{ +#ifdef CONFIG_EFI + unsigned long mstart, mend; + + if (!efi_enabled(EFI_BOOT)) + return 0; + + mstart = (boot_params.efi_info.efi_systab | + ((u64)boot_params.efi_info.efi_systab_hi<<32)); + + if (efi_enabled(EFI_64BIT)) + mend = mstart + sizeof(efi_system_table_64_t); + else + mend = mstart + sizeof(efi_system_table_32_t); + + if (!mstart) + return 0; + + return kernel_ident_mapping_init(info, level4p, mstart, mend); +#endif + return 0; +} + static void free_transition_pgtable(struct kimage *image) { free_page((unsigned long)image->arch.p4d); @@ -48,12 +123,13 @@ static void free_transition_pgtable(struct kimage *image) static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) { + pgprot_t prot = PAGE_KERNEL_EXEC_NOENC; + unsigned long vaddr, paddr; + int result = -ENOMEM; p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; - unsigned long vaddr, paddr; - int result = -ENOMEM; vaddr = (unsigned long)relocate_kernel; paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE); @@ -90,7 +166,11 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); } pte = pte_offset_kernel(pmd, vaddr); - set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC)); + + if (sev_active()) + prot = PAGE_KERNEL_EXEC; + + set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot)); return 0; err: return result; @@ -127,6 +207,11 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable) level4p = (pgd_t *)__va(start_pgtable); clear_page(level4p); + if (sev_active()) { + info.page_flag |= _PAGE_ENC; + info.kernpg_flag |= _PAGE_ENC; + } + if (direct_gbpages) info.direct_gbpages = true; @@ -157,6 +242,18 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable) return result; } + /* + * Prepare EFI systab and ACPI tables for kexec kernel since they are + * not covered by pfn_mapped. + */ + result = map_efi_systab(&info, level4p); + if (result) + return result; + + result = map_acpi_tables(&info, level4p); + if (result) + return result; + return init_transition_pgtable(image, level4p); } @@ -557,8 +654,20 @@ void arch_kexec_unprotect_crashkres(void) kexec_mark_crashkres(false); } +/* + * During a traditional boot under SME, SME will encrypt the kernel, + * so the SME kexec kernel also needs to be un-encrypted in order to + * replicate a normal SME boot. + * + * During a traditional boot under SEV, the kernel has already been + * loaded encrypted, so the SEV kexec kernel needs to be encrypted in + * order to replicate a normal SEV boot. + */ int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp) { + if (sev_active()) + return 0; + /* * If SME is active we need to be sure that kexec pages are * not encrypted because when we boot to the new kernel the @@ -569,6 +678,9 @@ int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp) void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { + if (sev_active()) + return; + /* * If SME is active we need to reset the pages back to being * an encrypted mapping before freeing them. diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index dcd272dbd0a9..f62b498b18fb 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -70,7 +70,7 @@ void __init pci_iommu_alloc(void) } /* - * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel + * See <Documentation/x86/x86_64/boot-options.rst> for the iommu kernel * parameter documentation. */ static __init int iommu_setup(char *p) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 08a5f4a131f5..bbe35bf879f5 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -453,15 +453,24 @@ static void __init memblock_x86_reserve_range_setup_data(void) #define CRASH_ALIGN SZ_16M /* - * Keep the crash kernel below this limit. On 32 bits earlier kernels - * would limit the kernel to the low 512 MiB due to mapping restrictions. + * Keep the crash kernel below this limit. + * + * On 32 bits earlier kernels would limit the kernel to the low 512 MiB + * due to mapping restrictions. + * + * On 64bit, kdump kernel need be restricted to be under 64TB, which is + * the upper limit of system RAM in 4-level paing mode. Since the kdump + * jumping could be from 5-level to 4-level, the jumping will fail if + * kernel is put above 64TB, and there's no way to detect the paging mode + * of the kernel which will be loaded for dumping during the 1st kernel + * bootup. */ #ifdef CONFIG_X86_32 # define CRASH_ADDR_LOW_MAX SZ_512M # define CRASH_ADDR_HIGH_MAX SZ_512M #else # define CRASH_ADDR_LOW_MAX SZ_4G -# define CRASH_ADDR_HIGH_MAX MAXMEM +# define CRASH_ADDR_HIGH_MAX SZ_64T #endif static int __init reserve_crashkernel_low(void) @@ -827,8 +836,14 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) void __init setup_arch(char **cmdline_p) { + /* + * Reserve the memory occupied by the kernel between _text and + * __end_of_kernel_reserve symbols. Any kernel sections after the + * __end_of_kernel_reserve symbol must be explicitly reserved with a + * separate memblock_reserve() or they will be discarded. + */ memblock_reserve(__pa_symbol(_text), - (unsigned long)__bss_stop - (unsigned long)_text); + (unsigned long)__end_of_kernel_reserve - (unsigned long)_text); /* * Make sure page 0 is always reserved because on systems with diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 0850b5149345..147cd020516a 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -368,6 +368,14 @@ SECTIONS __bss_stop = .; } + /* + * The memory occupied from _text to here, __end_of_kernel_reserve, is + * automatically reserved in setup_arch(). Anything after here must be + * explicitly reserved using memblock_reserve() or it will be discarded + * and treated as available memory. + */ + __end_of_kernel_reserve = .; + . = ALIGN(PAGE_SIZE); .brk : AT(ADDR(.brk) - LOAD_OFFSET) { __brk_base = .; @@ -379,10 +387,34 @@ SECTIONS . = ALIGN(PAGE_SIZE); /* keep VO_INIT_SIZE page aligned */ _end = .; +#ifdef CONFIG_AMD_MEM_ENCRYPT + /* + * Early scratch/workarea section: Lives outside of the kernel proper + * (_text - _end). + * + * Resides after _end because even though the .brk section is after + * __end_of_kernel_reserve, the .brk section is later reserved as a + * part of the kernel. Since it is located after __end_of_kernel_reserve + * it will be discarded and become part of the available memory. As + * such, it can only be used by very early boot code and must not be + * needed afterwards. + * + * Currently used by SME for performing in-place encryption of the + * kernel during boot. Resides on a 2MB boundary to simplify the + * pagetable setup used for SME in-place encryption. + */ + . = ALIGN(HPAGE_SIZE); + .init.scratch : AT(ADDR(.init.scratch) - LOAD_OFFSET) { + __init_scratch_begin = .; + *(.init.scratch) + . = ALIGN(HPAGE_SIZE); + __init_scratch_end = .; + } +#endif + STABS_DEBUG DWARF_DEBUG - /* Sections to be discarded */ DISCARDS /DISCARD/ : { *(.eh_frame) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 4b6423e7bd21..e500f1df1140 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -28,9 +28,11 @@ #include "physaddr.h" -struct ioremap_mem_flags { - bool system_ram; - bool desc_other; +/* + * Descriptor controlling ioremap() behavior. + */ +struct ioremap_desc { + unsigned int flags; }; /* @@ -62,13 +64,14 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size, return err; } -static bool __ioremap_check_ram(struct resource *res) +/* Does the range (or a subset of) contain normal RAM? */ +static unsigned int __ioremap_check_ram(struct resource *res) { unsigned long start_pfn, stop_pfn; unsigned long i; if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM) - return false; + return 0; start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT; stop_pfn = (res->end + 1) >> PAGE_SHIFT; @@ -76,28 +79,44 @@ static bool __ioremap_check_ram(struct resource *res) for (i = 0; i < (stop_pfn - start_pfn); ++i) if (pfn_valid(start_pfn + i) && !PageReserved(pfn_to_page(start_pfn + i))) - return true; + return IORES_MAP_SYSTEM_RAM; } - return false; + return 0; } -static int __ioremap_check_desc_other(struct resource *res) +/* + * In a SEV guest, NONE and RESERVED should not be mapped encrypted because + * there the whole memory is already encrypted. + */ +static unsigned int __ioremap_check_encrypted(struct resource *res) { - return (res->desc != IORES_DESC_NONE); + if (!sev_active()) + return 0; + + switch (res->desc) { + case IORES_DESC_NONE: + case IORES_DESC_RESERVED: + break; + default: + return IORES_MAP_ENCRYPTED; + } + + return 0; } -static int __ioremap_res_check(struct resource *res, void *arg) +static int __ioremap_collect_map_flags(struct resource *res, void *arg) { - struct ioremap_mem_flags *flags = arg; + struct ioremap_desc *desc = arg; - if (!flags->system_ram) - flags->system_ram = __ioremap_check_ram(res); + if (!(desc->flags & IORES_MAP_SYSTEM_RAM)) + desc->flags |= __ioremap_check_ram(res); - if (!flags->desc_other) - flags->desc_other = __ioremap_check_desc_other(res); + if (!(desc->flags & IORES_MAP_ENCRYPTED)) + desc->flags |= __ioremap_check_encrypted(res); - return flags->system_ram && flags->desc_other; + return ((desc->flags & (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)) == + (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)); } /* @@ -106,15 +125,15 @@ static int __ioremap_res_check(struct resource *res, void *arg) * resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES). */ static void __ioremap_check_mem(resource_size_t addr, unsigned long size, - struct ioremap_mem_flags *flags) + struct ioremap_desc *desc) { u64 start, end; start = (u64)addr; end = start + size - 1; - memset(flags, 0, sizeof(*flags)); + memset(desc, 0, sizeof(struct ioremap_desc)); - walk_mem_res(start, end, flags, __ioremap_res_check); + walk_mem_res(start, end, desc, __ioremap_collect_map_flags); } /* @@ -131,15 +150,15 @@ static void __ioremap_check_mem(resource_size_t addr, unsigned long size, * have to convert them into an offset in a page-aligned mapping, but the * caller shouldn't need to know that small detail. */ -static void __iomem *__ioremap_caller(resource_size_t phys_addr, - unsigned long size, enum page_cache_mode pcm, - void *caller, bool encrypted) +static void __iomem * +__ioremap_caller(resource_size_t phys_addr, unsigned long size, + enum page_cache_mode pcm, void *caller, bool encrypted) { unsigned long offset, vaddr; resource_size_t last_addr; const resource_size_t unaligned_phys_addr = phys_addr; const unsigned long unaligned_size = size; - struct ioremap_mem_flags mem_flags; + struct ioremap_desc io_desc; struct vm_struct *area; enum page_cache_mode new_pcm; pgprot_t prot; @@ -158,12 +177,12 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, return NULL; } - __ioremap_check_mem(phys_addr, size, &mem_flags); + __ioremap_check_mem(phys_addr, size, &io_desc); /* * Don't allow anybody to remap normal RAM that we're using.. */ - if (mem_flags.system_ram) { + if (io_desc.flags & IORES_MAP_SYSTEM_RAM) { WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n", &phys_addr, &last_addr); return NULL; @@ -201,7 +220,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, * resulting mapping. */ prot = PAGE_KERNEL_IO; - if ((sev_active() && mem_flags.desc_other) || encrypted) + if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted) prot = pgprot_encrypted(prot); switch (pcm) { diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index dddcd2a1afdb..e2b0e2ac07bb 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -70,6 +70,19 @@ struct sme_populate_pgd_data { unsigned long vaddr_end; }; +/* + * This work area lives in the .init.scratch section, which lives outside of + * the kernel proper. It is sized to hold the intermediate copy buffer and + * more than enough pagetable pages. + * + * By using this section, the kernel can be encrypted in place and it + * avoids any possibility of boot parameters or initramfs images being + * placed such that the in-place encryption logic overwrites them. This + * section is 2MB aligned to allow for simple pagetable setup using only + * PMD entries (see vmlinux.lds.S). + */ +static char sme_workarea[2 * PMD_PAGE_SIZE] __section(.init.scratch); + static char sme_cmdline_arg[] __initdata = "mem_encrypt"; static char sme_cmdline_on[] __initdata = "on"; static char sme_cmdline_off[] __initdata = "off"; @@ -311,8 +324,13 @@ void __init sme_encrypt_kernel(struct boot_params *bp) } #endif - /* Set the encryption workarea to be immediately after the kernel */ - workarea_start = kernel_end; + /* + * We're running identity mapped, so we must obtain the address to the + * SME encryption workarea using rip-relative addressing. + */ + asm ("lea sme_workarea(%%rip), %0" + : "=r" (workarea_start) + : "p" (sme_workarea)); /* * Calculate required number of workarea bytes needed: diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 91f6db92554c..4de9704c4aaf 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -712,7 +712,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, } /* - * See Documentation/x86/tlb.txt for details. We choose 33 + * See Documentation/x86/tlb.rst for details. We choose 33 * because it is large enough to cover the vast majority (at * least 95%) of allocations, and is small enough that we are * confident it will not cause too much overhead. Each single diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index b29e82f190c7..393d251798c0 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -253,13 +253,14 @@ static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk, /* dst = src */ static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[], const u8 src[], bool dstk, - bool sstk, u8 **pprog) + bool sstk, u8 **pprog, + const struct bpf_prog_aux *aux) { emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog); if (is64) /* complete 8 byte move */ emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog); - else + else if (!aux->verifier_zext) /* zero out high 4 bytes */ emit_ia32_mov_i(dst_hi, 0, dstk, pprog); } @@ -313,7 +314,8 @@ static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk, } static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val, - bool dstk, u8 **pprog) + bool dstk, u8 **pprog, + const struct bpf_prog_aux *aux) { u8 *prog = *pprog; int cnt = 0; @@ -334,12 +336,14 @@ static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val, */ EMIT2(0x0F, 0xB7); EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo)); - /* xor dreg_hi,dreg_hi */ - EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); + if (!aux->verifier_zext) + /* xor dreg_hi,dreg_hi */ + EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); break; case 32: - /* xor dreg_hi,dreg_hi */ - EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); + if (!aux->verifier_zext) + /* xor dreg_hi,dreg_hi */ + EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); break; case 64: /* nop */ @@ -358,7 +362,8 @@ static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val, } static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val, - bool dstk, u8 **pprog) + bool dstk, u8 **pprog, + const struct bpf_prog_aux *aux) { u8 *prog = *pprog; int cnt = 0; @@ -380,16 +385,18 @@ static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val, EMIT2(0x0F, 0xB7); EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo)); - /* xor dreg_hi,dreg_hi */ - EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); + if (!aux->verifier_zext) + /* xor dreg_hi,dreg_hi */ + EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); break; case 32: /* Emit 'bswap eax' to swap lower 4 bytes */ EMIT1(0x0F); EMIT1(add_1reg(0xC8, dreg_lo)); - /* xor dreg_hi,dreg_hi */ - EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); + if (!aux->verifier_zext) + /* xor dreg_hi,dreg_hi */ + EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); break; case 64: /* Emit 'bswap eax' to swap lower 4 bytes */ @@ -569,7 +576,7 @@ static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op, static inline void emit_ia32_alu_r64(const bool is64, const u8 op, const u8 dst[], const u8 src[], bool dstk, bool sstk, - u8 **pprog) + u8 **pprog, const struct bpf_prog_aux *aux) { u8 *prog = *pprog; @@ -577,7 +584,7 @@ static inline void emit_ia32_alu_r64(const bool is64, const u8 op, if (is64) emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk, &prog); - else + else if (!aux->verifier_zext) emit_ia32_mov_i(dst_hi, 0, dstk, &prog); *pprog = prog; } @@ -668,7 +675,8 @@ static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op, /* ALU operation (64 bit) */ static inline void emit_ia32_alu_i64(const bool is64, const u8 op, const u8 dst[], const u32 val, - bool dstk, u8 **pprog) + bool dstk, u8 **pprog, + const struct bpf_prog_aux *aux) { u8 *prog = *pprog; u32 hi = 0; @@ -679,7 +687,7 @@ static inline void emit_ia32_alu_i64(const bool is64, const u8 op, emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog); if (is64) emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog); - else + else if (!aux->verifier_zext) emit_ia32_mov_i(dst_hi, 0, dstk, &prog); *pprog = prog; @@ -724,9 +732,6 @@ static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[], { u8 *prog = *pprog; int cnt = 0; - static int jmp_label1 = -1; - static int jmp_label2 = -1; - static int jmp_label3 = -1; u8 dreg_lo = dstk ? IA32_EAX : dst_lo; u8 dreg_hi = dstk ? IA32_EDX : dst_hi; @@ -745,78 +750,22 @@ static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[], /* mov ecx,src_lo */ EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX)); - /* cmp ecx,32 */ - EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); - /* Jumps when >= 32 */ - if (is_imm8(jmp_label(jmp_label1, 2))) - EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); - else - EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6)); - - /* < 32 */ - /* shl dreg_hi,cl */ - EMIT2(0xD3, add_1reg(0xE0, dreg_hi)); - /* mov ebx,dreg_lo */ - EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX)); + /* shld dreg_hi,dreg_lo,cl */ + EMIT3(0x0F, 0xA5, add_2reg(0xC0, dreg_hi, dreg_lo)); /* shl dreg_lo,cl */ EMIT2(0xD3, add_1reg(0xE0, dreg_lo)); - /* IA32_ECX = -IA32_ECX + 32 */ - /* neg ecx */ - EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); - /* add ecx,32 */ - EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); + /* if ecx >= 32, mov dreg_lo into dreg_hi and clear dreg_lo */ - /* shr ebx,cl */ - EMIT2(0xD3, add_1reg(0xE8, IA32_EBX)); - /* or dreg_hi,ebx */ - EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX)); - - /* goto out; */ - if (is_imm8(jmp_label(jmp_label3, 2))) - EMIT2(0xEB, jmp_label(jmp_label3, 2)); - else - EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); - - /* >= 32 */ - if (jmp_label1 == -1) - jmp_label1 = cnt; - - /* cmp ecx,64 */ - EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64); - /* Jumps when >= 64 */ - if (is_imm8(jmp_label(jmp_label2, 2))) - EMIT2(IA32_JAE, jmp_label(jmp_label2, 2)); - else - EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6)); + /* cmp ecx,32 */ + EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); + /* skip the next two instructions (4 bytes) when < 32 */ + EMIT2(IA32_JB, 4); - /* >= 32 && < 64 */ - /* sub ecx,32 */ - EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32); - /* shl dreg_lo,cl */ - EMIT2(0xD3, add_1reg(0xE0, dreg_lo)); /* mov dreg_hi,dreg_lo */ EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo)); - - /* xor dreg_lo,dreg_lo */ - EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); - - /* goto out; */ - if (is_imm8(jmp_label(jmp_label3, 2))) - EMIT2(0xEB, jmp_label(jmp_label3, 2)); - else - EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); - - /* >= 64 */ - if (jmp_label2 == -1) - jmp_label2 = cnt; /* xor dreg_lo,dreg_lo */ EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); - /* xor dreg_hi,dreg_hi */ - EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); - - if (jmp_label3 == -1) - jmp_label3 = cnt; if (dstk) { /* mov dword ptr [ebp+off],dreg_lo */ @@ -836,9 +785,6 @@ static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[], { u8 *prog = *pprog; int cnt = 0; - static int jmp_label1 = -1; - static int jmp_label2 = -1; - static int jmp_label3 = -1; u8 dreg_lo = dstk ? IA32_EAX : dst_lo; u8 dreg_hi = dstk ? IA32_EDX : dst_hi; @@ -857,79 +803,23 @@ static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[], /* mov ecx,src_lo */ EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX)); - /* cmp ecx,32 */ - EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); - /* Jumps when >= 32 */ - if (is_imm8(jmp_label(jmp_label1, 2))) - EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); - else - EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6)); - - /* < 32 */ - /* lshr dreg_lo,cl */ - EMIT2(0xD3, add_1reg(0xE8, dreg_lo)); - /* mov ebx,dreg_hi */ - EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); - /* ashr dreg_hi,cl */ + /* shrd dreg_lo,dreg_hi,cl */ + EMIT3(0x0F, 0xAD, add_2reg(0xC0, dreg_lo, dreg_hi)); + /* sar dreg_hi,cl */ EMIT2(0xD3, add_1reg(0xF8, dreg_hi)); - /* IA32_ECX = -IA32_ECX + 32 */ - /* neg ecx */ - EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); - /* add ecx,32 */ - EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); - - /* shl ebx,cl */ - EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); - /* or dreg_lo,ebx */ - EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); - - /* goto out; */ - if (is_imm8(jmp_label(jmp_label3, 2))) - EMIT2(0xEB, jmp_label(jmp_label3, 2)); - else - EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); - - /* >= 32 */ - if (jmp_label1 == -1) - jmp_label1 = cnt; + /* if ecx >= 32, mov dreg_hi to dreg_lo and set/clear dreg_hi depending on sign */ - /* cmp ecx,64 */ - EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64); - /* Jumps when >= 64 */ - if (is_imm8(jmp_label(jmp_label2, 2))) - EMIT2(IA32_JAE, jmp_label(jmp_label2, 2)); - else - EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6)); + /* cmp ecx,32 */ + EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); + /* skip the next two instructions (5 bytes) when < 32 */ + EMIT2(IA32_JB, 5); - /* >= 32 && < 64 */ - /* sub ecx,32 */ - EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32); - /* ashr dreg_hi,cl */ - EMIT2(0xD3, add_1reg(0xF8, dreg_hi)); /* mov dreg_lo,dreg_hi */ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); - - /* ashr dreg_hi,imm8 */ + /* sar dreg_hi,31 */ EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); - /* goto out; */ - if (is_imm8(jmp_label(jmp_label3, 2))) - EMIT2(0xEB, jmp_label(jmp_label3, 2)); - else - EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); - - /* >= 64 */ - if (jmp_label2 == -1) - jmp_label2 = cnt; - /* ashr dreg_hi,imm8 */ - EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); - /* mov dreg_lo,dreg_hi */ - EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); - - if (jmp_label3 == -1) - jmp_label3 = cnt; - if (dstk) { /* mov dword ptr [ebp+off],dreg_lo */ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), @@ -948,9 +838,6 @@ static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk, { u8 *prog = *pprog; int cnt = 0; - static int jmp_label1 = -1; - static int jmp_label2 = -1; - static int jmp_label3 = -1; u8 dreg_lo = dstk ? IA32_EAX : dst_lo; u8 dreg_hi = dstk ? IA32_EDX : dst_hi; @@ -969,77 +856,23 @@ static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk, /* mov ecx,src_lo */ EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX)); - /* cmp ecx,32 */ - EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); - /* Jumps when >= 32 */ - if (is_imm8(jmp_label(jmp_label1, 2))) - EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); - else - EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6)); - - /* < 32 */ - /* lshr dreg_lo,cl */ - EMIT2(0xD3, add_1reg(0xE8, dreg_lo)); - /* mov ebx,dreg_hi */ - EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); + /* shrd dreg_lo,dreg_hi,cl */ + EMIT3(0x0F, 0xAD, add_2reg(0xC0, dreg_lo, dreg_hi)); /* shr dreg_hi,cl */ EMIT2(0xD3, add_1reg(0xE8, dreg_hi)); - /* IA32_ECX = -IA32_ECX + 32 */ - /* neg ecx */ - EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); - /* add ecx,32 */ - EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); - - /* shl ebx,cl */ - EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); - /* or dreg_lo,ebx */ - EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); - - /* goto out; */ - if (is_imm8(jmp_label(jmp_label3, 2))) - EMIT2(0xEB, jmp_label(jmp_label3, 2)); - else - EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); + /* if ecx >= 32, mov dreg_hi to dreg_lo and clear dreg_hi */ - /* >= 32 */ - if (jmp_label1 == -1) - jmp_label1 = cnt; - /* cmp ecx,64 */ - EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64); - /* Jumps when >= 64 */ - if (is_imm8(jmp_label(jmp_label2, 2))) - EMIT2(IA32_JAE, jmp_label(jmp_label2, 2)); - else - EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6)); + /* cmp ecx,32 */ + EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); + /* skip the next two instructions (4 bytes) when < 32 */ + EMIT2(IA32_JB, 4); - /* >= 32 && < 64 */ - /* sub ecx,32 */ - EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32); - /* shr dreg_hi,cl */ - EMIT2(0xD3, add_1reg(0xE8, dreg_hi)); /* mov dreg_lo,dreg_hi */ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); /* xor dreg_hi,dreg_hi */ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); - /* goto out; */ - if (is_imm8(jmp_label(jmp_label3, 2))) - EMIT2(0xEB, jmp_label(jmp_label3, 2)); - else - EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); - - /* >= 64 */ - if (jmp_label2 == -1) - jmp_label2 = cnt; - /* xor dreg_lo,dreg_lo */ - EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); - /* xor dreg_hi,dreg_hi */ - EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); - - if (jmp_label3 == -1) - jmp_label3 = cnt; - if (dstk) { /* mov dword ptr [ebp+off],dreg_lo */ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), @@ -1069,27 +902,10 @@ static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val, } /* Do LSH operation */ if (val < 32) { - /* shl dreg_hi,imm8 */ - EMIT3(0xC1, add_1reg(0xE0, dreg_hi), val); - /* mov ebx,dreg_lo */ - EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX)); + /* shld dreg_hi,dreg_lo,imm8 */ + EMIT4(0x0F, 0xA4, add_2reg(0xC0, dreg_hi, dreg_lo), val); /* shl dreg_lo,imm8 */ EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val); - - /* IA32_ECX = 32 - val */ - /* mov ecx,val */ - EMIT2(0xB1, val); - /* movzx ecx,ecx */ - EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX)); - /* neg ecx */ - EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); - /* add ecx,32 */ - EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); - - /* shr ebx,cl */ - EMIT2(0xD3, add_1reg(0xE8, IA32_EBX)); - /* or dreg_hi,ebx */ - EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX)); } else if (val >= 32 && val < 64) { u32 value = val - 32; @@ -1135,27 +951,10 @@ static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val, /* Do RSH operation */ if (val < 32) { - /* shr dreg_lo,imm8 */ - EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val); - /* mov ebx,dreg_hi */ - EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); + /* shrd dreg_lo,dreg_hi,imm8 */ + EMIT4(0x0F, 0xAC, add_2reg(0xC0, dreg_lo, dreg_hi), val); /* shr dreg_hi,imm8 */ EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val); - - /* IA32_ECX = 32 - val */ - /* mov ecx,val */ - EMIT2(0xB1, val); - /* movzx ecx,ecx */ - EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX)); - /* neg ecx */ - EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); - /* add ecx,32 */ - EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); - - /* shl ebx,cl */ - EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); - /* or dreg_lo,ebx */ - EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); } else if (val >= 32 && val < 64) { u32 value = val - 32; @@ -1200,27 +999,10 @@ static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val, } /* Do RSH operation */ if (val < 32) { - /* shr dreg_lo,imm8 */ - EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val); - /* mov ebx,dreg_hi */ - EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); + /* shrd dreg_lo,dreg_hi,imm8 */ + EMIT4(0x0F, 0xAC, add_2reg(0xC0, dreg_lo, dreg_hi), val); /* ashr dreg_hi,imm8 */ EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val); - - /* IA32_ECX = 32 - val */ - /* mov ecx,val */ - EMIT2(0xB1, val); - /* movzx ecx,ecx */ - EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX)); - /* neg ecx */ - EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); - /* add ecx,32 */ - EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); - - /* shl ebx,cl */ - EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); - /* or dreg_lo,ebx */ - EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); } else if (val >= 32 && val < 64) { u32 value = val - 32; @@ -1713,8 +1495,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU64 | BPF_MOV | BPF_X: switch (BPF_SRC(code)) { case BPF_X: - emit_ia32_mov_r64(is64, dst, src, dstk, - sstk, &prog); + if (imm32 == 1) { + /* Special mov32 for zext. */ + emit_ia32_mov_i(dst_hi, 0, dstk, &prog); + break; + } + emit_ia32_mov_r64(is64, dst, src, dstk, sstk, + &prog, bpf_prog->aux); break; case BPF_K: /* Sign-extend immediate value to dst reg */ @@ -1754,11 +1541,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, switch (BPF_SRC(code)) { case BPF_X: emit_ia32_alu_r64(is64, BPF_OP(code), dst, - src, dstk, sstk, &prog); + src, dstk, sstk, &prog, + bpf_prog->aux); break; case BPF_K: emit_ia32_alu_i64(is64, BPF_OP(code), dst, - imm32, dstk, &prog); + imm32, dstk, &prog, + bpf_prog->aux); break; } break; @@ -1777,7 +1566,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, false, &prog); break; } - emit_ia32_mov_i(dst_hi, 0, dstk, &prog); + if (!bpf_prog->aux->verifier_zext) + emit_ia32_mov_i(dst_hi, 0, dstk, &prog); break; case BPF_ALU | BPF_LSH | BPF_X: case BPF_ALU | BPF_RSH | BPF_X: @@ -1797,7 +1587,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, &prog); break; } - emit_ia32_mov_i(dst_hi, 0, dstk, &prog); + if (!bpf_prog->aux->verifier_zext) + emit_ia32_mov_i(dst_hi, 0, dstk, &prog); break; /* dst = dst / src(imm) */ /* dst = dst % src(imm) */ @@ -1819,7 +1610,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, &prog); break; } - emit_ia32_mov_i(dst_hi, 0, dstk, &prog); + if (!bpf_prog->aux->verifier_zext) + emit_ia32_mov_i(dst_hi, 0, dstk, &prog); break; case BPF_ALU64 | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_X: @@ -1836,7 +1628,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk, false, &prog); - emit_ia32_mov_i(dst_hi, 0, dstk, &prog); + if (!bpf_prog->aux->verifier_zext) + emit_ia32_mov_i(dst_hi, 0, dstk, &prog); break; /* dst = dst << imm */ case BPF_ALU64 | BPF_LSH | BPF_K: @@ -1872,7 +1665,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU | BPF_NEG: emit_ia32_alu_i(is64, false, BPF_OP(code), dst_lo, 0, dstk, &prog); - emit_ia32_mov_i(dst_hi, 0, dstk, &prog); + if (!bpf_prog->aux->verifier_zext) + emit_ia32_mov_i(dst_hi, 0, dstk, &prog); break; /* dst = ~dst (64 bit) */ case BPF_ALU64 | BPF_NEG: @@ -1892,11 +1686,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, break; /* dst = htole(dst) */ case BPF_ALU | BPF_END | BPF_FROM_LE: - emit_ia32_to_le_r64(dst, imm32, dstk, &prog); + emit_ia32_to_le_r64(dst, imm32, dstk, &prog, + bpf_prog->aux); break; /* dst = htobe(dst) */ case BPF_ALU | BPF_END | BPF_FROM_BE: - emit_ia32_to_be_r64(dst, imm32, dstk, &prog); + emit_ia32_to_be_r64(dst, imm32, dstk, &prog, + bpf_prog->aux); break; /* dst = imm64 */ case BPF_LD | BPF_IMM | BPF_DW: { @@ -2051,6 +1847,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_B: case BPF_H: case BPF_W: + if (!bpf_prog->aux->verifier_zext) + break; if (dstk) { EMIT3(0xC7, add_1reg(0x40, IA32_EBP), STACK_VAR(dst_hi)); @@ -2475,6 +2273,11 @@ notyet: return proglen; } +bool bpf_jit_needs_zext(void) +{ + return true; +} + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { struct bpf_binary_header *header = NULL; diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c index 1861a2ba0f2b..c0a502f7e3a7 100644 --- a/arch/x86/platform/pvh/enlighten.c +++ b/arch/x86/platform/pvh/enlighten.c @@ -86,7 +86,7 @@ static void __init init_pvh_bootparams(bool xen_guest) } /* - * See Documentation/x86/boot.txt. + * See Documentation/x86/boot.rst. * * Version 2.12 supports Xen entry point but we will use default x86/PC * environment (i.e. hardware_subarch 0). diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 6ec1b75eabc5..ebc135bda921 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -2,6 +2,7 @@ config XTENSA def_bool y select ARCH_32BIT_OFF_T + select ARCH_HAS_BINFMT_FLAT if !MMU select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_NO_COHERENT_DMA_MMAP if !MMU diff --git a/arch/xtensa/include/asm/flat.h b/arch/xtensa/include/asm/flat.h index b8532d7877b3..ed5870c779f9 100644 --- a/arch/xtensa/include/asm/flat.h +++ b/arch/xtensa/include/asm/flat.h @@ -4,11 +4,8 @@ #include <asm/unaligned.h> -#define flat_argvp_envp_on_stack() 0 -#define flat_old_ram_flag(flags) (flags) -#define flat_reloc_valid(reloc, size) ((reloc) <= (size)) static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags, - u32 *addr, u32 *persistent) + u32 *addr) { *addr = get_unaligned((__force u32 *)rp); return 0; @@ -18,7 +15,5 @@ static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel) put_unaligned(addr, (__force u32 *)rp); return 0; } -#define flat_get_relocate_addr(rel) (rel) -#define flat_set_persistent(relval, p) 0 #endif /* __ASM_XTENSA_FLAT_H */ diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h index 30af4dc3ce7b..b52236245e51 100644 --- a/arch/xtensa/include/asm/unistd.h +++ b/arch/xtensa/include/asm/unistd.h @@ -3,6 +3,7 @@ #define _XTENSA_UNISTD_H #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 #include <uapi/asm/unistd.h> #define __ARCH_WANT_NEW_STAT diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 5fa0ee1c8e00..25f4de729a6d 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -404,3 +404,5 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open +435 common clone3 sys_clone3 |