diff options
Diffstat (limited to 'tools')
155 files changed, 3647 insertions, 1065 deletions
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h index 6edd177bb1c7..caae4843cb70 100644 --- a/tools/arch/arm/include/uapi/asm/kvm.h +++ b/tools/arch/arm/include/uapi/asm/kvm.h @@ -135,6 +135,15 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_CRM_SHIFT 7 #define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800 #define KVM_REG_ARM_32_CRN_SHIFT 11 +/* + * For KVM currently all guest registers are nonsecure, but we reserve a bit + * in the encoding to distinguish secure from nonsecure for AArch32 system + * registers that are banked by security. This is 1 for the secure banked + * register, and 0 for the nonsecure banked register or if the register is + * not banked by security. + */ +#define KVM_REG_ARM_SECURE_MASK 0x0000000010000000 +#define KVM_REG_ARM_SECURE_SHIFT 28 #define ARM_CP15_REG_SHIFT_MASK(x,n) \ (((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK) @@ -186,6 +195,12 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_VFP_FPINST 0x1009 #define KVM_REG_ARM_VFP_FPINST2 0x100A +/* KVM-as-firmware specific pseudo-registers */ +#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM | KVM_REG_SIZE_U64 | \ + KVM_REG_ARM_FW | ((r) & 0xffff)) +#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0) + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 9abbf3044654..04b3256f8e6d 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -206,6 +206,12 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) #define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) +/* KVM-as-firmware specific pseudo-registers */ +#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ + KVM_REG_ARM_FW | ((r) & 0xffff)) +#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0) + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index f41079da38c5..578793e97431 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -316,9 +316,11 @@ #define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ #define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */ #define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ +#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ +#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ @@ -328,6 +330,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index fb3a6de7440b..6847d85400a8 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -53,12 +53,6 @@ # define NEED_MOVBE 0 #endif -#ifdef CONFIG_X86_5LEVEL -# define NEED_LA57 (1<<(X86_FEATURE_LA57 & 31)) -#else -# define NEED_LA57 0 -#endif - #ifdef CONFIG_X86_64 #ifdef CONFIG_PARAVIRT /* Paravirtualized systems may not have PSE or PGE available */ @@ -104,7 +98,7 @@ #define REQUIRED_MASK13 0 #define REQUIRED_MASK14 0 #define REQUIRED_MASK15 0 -#define REQUIRED_MASK16 (NEED_LA57) +#define REQUIRED_MASK16 0 #define REQUIRED_MASK17 0 #define REQUIRED_MASK18 0 #define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index f3a960488eae..c535c2fdea13 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -354,8 +354,25 @@ struct kvm_xcrs { __u64 padding[16]; }; -/* definition of registers in kvm_run */ +#define KVM_SYNC_X86_REGS (1UL << 0) +#define KVM_SYNC_X86_SREGS (1UL << 1) +#define KVM_SYNC_X86_EVENTS (1UL << 2) + +#define KVM_SYNC_X86_VALID_FIELDS \ + (KVM_SYNC_X86_REGS| \ + KVM_SYNC_X86_SREGS| \ + KVM_SYNC_X86_EVENTS) + +/* kvm_sync_regs struct included by kvm_run struct */ struct kvm_sync_regs { + /* Members of this structure are potentially malicious. + * Care must be taken by code reading, esp. interpreting, + * data fields from them inside KVM to prevent TOCTOU and + * double-fetch types of vulnerabilities. + */ + struct kvm_regs regs; + struct kvm_sregs sregs; + struct kvm_vcpu_events events; }; #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index 1ea545965ee3..53b60ad452f5 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -76,6 +76,8 @@ $(OUTPUT)bpf_asm: $(OUTPUT)bpf_asm.o $(OUTPUT)bpf_exp.yacc.o $(OUTPUT)bpf_exp.le $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ $(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c +$(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c +$(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c clean: bpftool_clean $(call QUIET_CLEAN, bpf-progs) diff --git a/tools/bpf/bpf_dbg.c b/tools/bpf/bpf_dbg.c index 4f254bcc4423..61b9aa5d6415 100644 --- a/tools/bpf/bpf_dbg.c +++ b/tools/bpf/bpf_dbg.c @@ -1063,7 +1063,7 @@ static int cmd_load_pcap(char *file) static int cmd_load(char *arg) { - char *subcmd, *cont, *tmp = strdup(arg); + char *subcmd, *cont = NULL, *tmp = strdup(arg); int ret = CMD_OK; subcmd = strtok_r(tmp, " ", &cont); @@ -1073,7 +1073,10 @@ static int cmd_load(char *arg) bpf_reset(); bpf_reset_breakpoints(); - ret = cmd_load_bpf(cont); + if (!cont) + ret = CMD_ERR; + else + ret = cmd_load_bpf(cont); } else if (matches(subcmd, "pcap") == 0) { ret = cmd_load_pcap(cont); } else { diff --git a/tools/build/Build.include b/tools/build/Build.include index 418871d02ebf..a4bbb984941d 100644 --- a/tools/build/Build.include +++ b/tools/build/Build.include @@ -12,6 +12,7 @@ # Convenient variables comma := , squote := ' +pound := \# ### # Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o @@ -43,11 +44,11 @@ echo-cmd = $(if $($(quiet)cmd_$(1)),\ ### # Replace >$< with >$$< to preserve $ when reloading the .cmd file # (needed for make) -# Replace >#< with >\#< to avoid starting a comment in the .cmd file +# Replace >#< with >$(pound)< to avoid starting a comment in the .cmd file # (needed for make) # Replace >'< with >'\''< to be able to enclose the whole string in '...' # (needed for the shell) -make-cmd = $(call escsq,$(subst \#,\\\#,$(subst $$,$$$$,$(cmd_$(1))))) +make-cmd = $(call escsq,$(subst $(pound),$$(pound),$(subst $$,$$$$,$(cmd_$(1))))) ### # Find any prerequisites that is newer than target or that does not exist. diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 04e32f965ad7..1827c2f973f9 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -151,11 +151,21 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * required ordering. */ -#define READ_ONCE(x) \ - ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) - -#define WRITE_ONCE(x, val) \ - ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) +#define READ_ONCE(x) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u = \ + { .__c = { 0 } }; \ + __read_once_size(&(x), __u.__c, sizeof(x)); \ + __u.__val; \ +}) + +#define WRITE_ONCE(x, val) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u = \ + { .__val = (val) }; \ + __write_once_size(&(x), __u.__c, sizeof(x)); \ + __u.__val; \ +}) #ifndef __fallthrough diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index edfeaba95429..a1a959ba24ff 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -1,18 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright(C) 2015 Linaro Limited. All rights reserved. * Author: Mathieu Poirier <mathieu.poirier@linaro.org> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef _LINUX_CORESIGHT_PMU_H diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h index b21b586b9854..1738c0391da4 100644 --- a/tools/include/linux/spinlock.h +++ b/tools/include/linux/spinlock.h @@ -6,8 +6,9 @@ #include <stdbool.h> #define spinlock_t pthread_mutex_t -#define DEFINE_SPINLOCK(x) pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER; +#define DEFINE_SPINLOCK(x) pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER #define __SPIN_LOCK_UNLOCKED(x) (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER +#define spin_lock_init(x) pthread_mutex_init(x, NULL) #define spin_lock_irqsave(x, f) (void)f, pthread_mutex_lock(x) #define spin_unlock_irqrestore(x, f) (void)f, pthread_mutex_unlock(x) diff --git a/tools/include/tools/config.h b/tools/include/tools/config.h new file mode 100644 index 000000000000..08ade7df8132 --- /dev/null +++ b/tools/include/tools/config.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_CONFIG_H +#define _TOOLS_CONFIG_H + +/* Subset of include/linux/kconfig.h */ + +#define __ARG_PLACEHOLDER_1 0, +#define __take_second_arg(__ignored, val, ...) val + +/* + * Helper macros to use CONFIG_ options in C/CPP expressions. Note that + * these only work with boolean and tristate options. + */ + +/* + * Getting something that works in C and CPP for an arg that may or may + * not be defined is tricky. Here, if we have "#define CONFIG_BOOGER 1" + * we match on the placeholder define, insert the "0," for arg1 and generate + * the triplet (0, 1, 0). Then the last step cherry picks the 2nd arg (a one). + * When CONFIG_BOOGER is not defined, we generate a (... 1, 0) pair, and when + * the last step cherry picks the 2nd arg, we get a zero. + */ +#define __is_defined(x) ___is_defined(x) +#define ___is_defined(val) ____is_defined(__ARG_PLACEHOLDER_##val) +#define ____is_defined(arg1_or_junk) __take_second_arg(arg1_or_junk 1, 0) + +/* + * IS_BUILTIN(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y', 0 + * otherwise. For boolean options, this is equivalent to + * IS_ENABLED(CONFIG_FOO). + */ +#define IS_BUILTIN(option) __is_defined(option) + +#endif /* _TOOLS_CONFIG_H */ diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index f8b134f5608f..e7ee32861d51 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -27,6 +27,9 @@ # define MAP_UNINITIALIZED 0x0 /* Don't support this flag */ #endif +/* 0x0100 - 0x80000 flags are defined in asm-generic/mman.h */ +#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ + /* * Flags for mlock */ diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 536ee4febd74..7f5634ce8e88 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -318,6 +318,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_PERF_OPEN 0x36 #define DRM_I915_PERF_ADD_CONFIG 0x37 #define DRM_I915_PERF_REMOVE_CONFIG 0x38 +#define DRM_I915_QUERY 0x39 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -375,6 +376,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param) #define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config) #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64) +#define DRM_IOCTL_I915_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -1358,7 +1360,9 @@ struct drm_intel_overlay_attrs { * active on a given plane. */ -#define I915_SET_COLORKEY_NONE (1<<0) /* disable color key matching */ +#define I915_SET_COLORKEY_NONE (1<<0) /* Deprecated. Instead set + * flags==0 to disable colorkeying. + */ #define I915_SET_COLORKEY_DESTINATION (1<<1) #define I915_SET_COLORKEY_SOURCE (1<<2) struct drm_intel_sprite_colorkey { @@ -1604,15 +1608,115 @@ struct drm_i915_perf_oa_config { __u32 n_flex_regs; /* - * These fields are pointers to tuples of u32 values (register - * address, value). For example the expected length of the buffer - * pointed by mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs). + * These fields are pointers to tuples of u32 values (register address, + * value). For example the expected length of the buffer pointed by + * mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs). */ __u64 mux_regs_ptr; __u64 boolean_regs_ptr; __u64 flex_regs_ptr; }; +struct drm_i915_query_item { + __u64 query_id; +#define DRM_I915_QUERY_TOPOLOGY_INFO 1 + + /* + * When set to zero by userspace, this is filled with the size of the + * data to be written at the data_ptr pointer. The kernel sets this + * value to a negative value to signal an error on a particular query + * item. + */ + __s32 length; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 flags; + + /* + * Data will be written at the location pointed by data_ptr when the + * value of length matches the length of the data to be written by the + * kernel. + */ + __u64 data_ptr; +}; + +struct drm_i915_query { + __u32 num_items; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 flags; + + /* + * This points to an array of num_items drm_i915_query_item structures. + */ + __u64 items_ptr; +}; + +/* + * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO : + * + * data: contains the 3 pieces of information : + * + * - the slice mask with one bit per slice telling whether a slice is + * available. The availability of slice X can be queried with the following + * formula : + * + * (data[X / 8] >> (X % 8)) & 1 + * + * - the subslice mask for each slice with one bit per subslice telling + * whether a subslice is available. The availability of subslice Y in slice + * X can be queried with the following formula : + * + * (data[subslice_offset + + * X * subslice_stride + + * Y / 8] >> (Y % 8)) & 1 + * + * - the EU mask for each subslice in each slice with one bit per EU telling + * whether an EU is available. The availability of EU Z in subslice Y in + * slice X can be queried with the following formula : + * + * (data[eu_offset + + * (X * max_subslices + Y) * eu_stride + + * Z / 8] >> (Z % 8)) & 1 + */ +struct drm_i915_query_topology_info { + /* + * Unused for now. Must be cleared to zero. + */ + __u16 flags; + + __u16 max_slices; + __u16 max_subslices; + __u16 max_eus_per_subslice; + + /* + * Offset in data[] at which the subslice masks are stored. + */ + __u16 subslice_offset; + + /* + * Stride at which each of the subslice masks for each slice are + * stored. + */ + __u16 subslice_stride; + + /* + * Offset in data[] at which the EU masks are stored. + */ + __u16 eu_offset; + + /* + * Stride at which each of the EU masks for each subslice are stored. + */ + __u16 eu_stride; + + __u8 data[]; +}; + #if defined(__cplusplus) } #endif diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9d07465023a2..8c317737ba3f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -864,6 +864,7 @@ enum bpf_func_id { /* BPF_FUNC_skb_set_tunnel_key flags. */ #define BPF_F_ZERO_CSUM_TX (1ULL << 1) #define BPF_F_DONT_FRAGMENT (1ULL << 2) +#define BPF_F_SEQ_NUMBER (1ULL << 3) /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and * BPF_FUNC_perf_event_read_value flags. @@ -1016,6 +1017,7 @@ struct bpf_prog_info { __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; __u32 ifindex; + __u32 :32; __u64 netns_dev; __u64 netns_ino; } __attribute__((aligned(8))); @@ -1029,6 +1031,7 @@ struct bpf_map_info { __u32 map_flags; char name[BPF_OBJ_NAME_LEN]; __u32 ifindex; + __u32 :32; __u64 netns_dev; __u64 netns_ino; } __attribute__((aligned(8))); diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 6d9447700e18..68699f654118 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -941,4 +941,43 @@ enum { IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */ }; +/* tun section */ + +enum { + IFLA_TUN_UNSPEC, + IFLA_TUN_OWNER, + IFLA_TUN_GROUP, + IFLA_TUN_TYPE, + IFLA_TUN_PI, + IFLA_TUN_VNET_HDR, + IFLA_TUN_PERSIST, + IFLA_TUN_MULTI_QUEUE, + IFLA_TUN_NUM_QUEUES, + IFLA_TUN_NUM_DISABLED_QUEUES, + __IFLA_TUN_MAX, +}; + +#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1) + +/* rmnet section */ + +#define RMNET_FLAGS_INGRESS_DEAGGREGATION (1U << 0) +#define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1) +#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2) +#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3) + +enum { + IFLA_RMNET_UNSPEC, + IFLA_RMNET_MUX_ID, + IFLA_RMNET_FLAGS, + __IFLA_RMNET_MAX, +}; + +#define IFLA_RMNET_MAX (__IFLA_RMNET_MAX - 1) + +struct ifla_rmnet_flags { + __u32 flags; + __u32 mask; +}; + #endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 6b89f87db200..b02c41e53d56 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -396,6 +396,10 @@ struct kvm_run { char padding[256]; }; + /* 2048 is the size of the char array used to bound/pad the size + * of the union that holds sync regs. + */ + #define SYNC_REGS_SIZE_BYTES 2048 /* * shared registers between kvm and userspace. * kvm_valid_regs specifies the register classes set by the host @@ -407,7 +411,7 @@ struct kvm_run { __u64 kvm_dirty_regs; union { struct kvm_sync_regs regs; - char padding[2048]; + char padding[SYNC_REGS_SIZE_BYTES]; } s; }; @@ -672,6 +676,13 @@ struct kvm_ioeventfd { __u8 pad[36]; }; +#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) +#define KVM_X86_DISABLE_EXITS_HTL (1 << 1) +#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) +#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ + KVM_X86_DISABLE_EXITS_HTL | \ + KVM_X86_DISABLE_EXITS_PAUSE) + /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { /* in */ @@ -936,6 +947,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_GET_CPU_CHAR 151 #define KVM_CAP_S390_BPB 152 #define KVM_CAP_GET_MSR_FEATURES 153 +#define KVM_CAP_HYPERV_EVENTFD 154 #ifdef KVM_CAP_IRQ_ROUTING @@ -1375,6 +1387,10 @@ struct kvm_enc_region { #define KVM_MEMORY_ENCRYPT_REG_REGION _IOR(KVMIO, 0xbb, struct kvm_enc_region) #define KVM_MEMORY_ENCRYPT_UNREG_REGION _IOR(KVMIO, 0xbc, struct kvm_enc_region) +/* Available with KVM_CAP_HYPERV_EVENTFD */ +#define KVM_HYPERV_EVENTFD _IOW(KVMIO, 0xbd, struct kvm_hyperv_eventfd) + + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ @@ -1515,4 +1531,14 @@ struct kvm_assigned_msix_entry { #define KVM_ARM_DEV_EL1_PTIMER (1 << 1) #define KVM_ARM_DEV_PMU (1 << 2) +struct kvm_hyperv_eventfd { + __u32 conn_id; + __s32 fd; + __u32 flags; + __u32 padding[3]; +}; + +#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff +#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) + #endif /* __LINUX_KVM_H */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 912b85b52344..b8e288a1f740 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -650,11 +650,23 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) /* - * Indicates that the content of PERF_SAMPLE_IP points to - * the actual instruction that triggered the event. See also - * perf_event_attr::precise_ip. + * These PERF_RECORD_MISC_* flags below are safely reused + * for the following events: + * + * PERF_RECORD_MISC_EXACT_IP - PERF_RECORD_SAMPLE of precise events + * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events + * + * + * PERF_RECORD_MISC_EXACT_IP: + * Indicates that the content of PERF_SAMPLE_IP points to + * the actual instruction that triggered the event. See also + * perf_event_attr::precise_ip. + * + * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT: + * Indicates that thread was preempted in TASK_RUNNING state. */ #define PERF_RECORD_MISC_EXACT_IP (1 << 14) +#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) /* * Reserve the last bit to indicate some extended misc field */ diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h index 07d61583fd02..ed0a120d4f08 100644 --- a/tools/include/uapi/sound/asound.h +++ b/tools/include/uapi/sound/asound.h @@ -242,6 +242,7 @@ typedef int __bitwise snd_pcm_format_t; #define SNDRV_PCM_FORMAT_DSD_U16_BE ((__force snd_pcm_format_t) 51) /* DSD, 2-byte samples DSD (x16), big endian */ #define SNDRV_PCM_FORMAT_DSD_U32_BE ((__force snd_pcm_format_t) 52) /* DSD, 4-byte samples DSD (x32), big endian */ #define SNDRV_PCM_FORMAT_LAST SNDRV_PCM_FORMAT_DSD_U32_BE +#define SNDRV_PCM_FORMAT_FIRST SNDRV_PCM_FORMAT_S8 #ifdef SNDRV_LITTLE_ENDIAN #define SNDRV_PCM_FORMAT_S16 SNDRV_PCM_FORMAT_S16_LE diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5922443063f0..0f9f06df49bc 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2035,7 +2035,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, return -EINVAL; obj = bpf_object__open(attr->file); - if (IS_ERR(obj)) + if (IS_ERR_OR_NULL(obj)) return -ENOENT; bpf_object__for_each_program(prog, obj) { diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c index f6a1babcbac4..cb7154eccbdc 100644 --- a/tools/lib/subcmd/parse-options.c +++ b/tools/lib/subcmd/parse-options.c @@ -433,7 +433,7 @@ match: if (ambiguous_option) { fprintf(stderr, - " Error: Ambiguous option: %s (could be --%s%s or --%s%s)", + " Error: Ambiguous option: %s (could be --%s%s or --%s%s)\n", arg, (ambiguous_flags & OPT_UNSET) ? "no-" : "", ambiguous_option->long_name, @@ -458,7 +458,7 @@ static void check_typos(const char *arg, const struct option *options) return; if (strstarts(arg, "no-")) { - fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)", arg); + fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)\n", arg); exit(129); } @@ -466,7 +466,7 @@ static void check_typos(const char *arg, const struct option *options) if (!options->long_name) continue; if (strstarts(options->long_name, arg)) { - fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)", arg); + fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)\n", arg); exit(129); } } diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index e6acc281dd37..f76d9914686a 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -31,11 +31,11 @@ INCLUDES := -I$(srctree)/tools/include \ -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \ -I$(srctree)/tools/objtool/arch/$(ARCH)/include WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -CFLAGS += -Wall -Werror $(WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES) -LDFLAGS += -lelf $(LIBSUBCMD) +CFLAGS += -Werror $(WARNINGS) $(HOSTCFLAGS) -g $(INCLUDES) +LDFLAGS += -lelf $(LIBSUBCMD) $(HOSTLDFLAGS) # Allow old libelf to be used: -elfshdr := $(shell echo '\#include <libelf.h>' | $(CC) $(CFLAGS) -x c -E - | grep elf_getshdr) +elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(CC) $(CFLAGS) -x c -E - | grep elf_getshdr) CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED) AWK = awk diff --git a/tools/objtool/arch/x86/include/asm/insn.h b/tools/objtool/arch/x86/include/asm/insn.h index b3e32b010ab1..c2c01f84df75 100644 --- a/tools/objtool/arch/x86/include/asm/insn.h +++ b/tools/objtool/arch/x86/include/asm/insn.h @@ -208,4 +208,22 @@ static inline int insn_offset_immediate(struct insn *insn) return insn_offset_displacement(insn) + insn->displacement.nbytes; } +#define POP_SS_OPCODE 0x1f +#define MOV_SREG_OPCODE 0x8e + +/* + * Intel SDM Vol.3A 6.8.3 states; + * "Any single-step trap that would be delivered following the MOV to SS + * instruction or POP to SS instruction (because EFLAGS.TF is 1) is + * suppressed." + * This function returns true if @insn is MOV SS or POP SS. On these + * instructions, single stepping is suppressed. + */ +static inline int insn_masking_exception(struct insn *insn) +{ + return insn->opcode.bytes[0] == POP_SS_OPCODE || + (insn->opcode.bytes[0] == MOV_SREG_OPCODE && + X86_MODRM_REG(insn->modrm.bytes[0]) == 2); +} + #endif /* _ASM_X86_INSN_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 5409f6f6c48d..3a31b238f885 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -59,6 +59,31 @@ static struct instruction *next_insn_same_sec(struct objtool_file *file, return next; } +static struct instruction *next_insn_same_func(struct objtool_file *file, + struct instruction *insn) +{ + struct instruction *next = list_next_entry(insn, list); + struct symbol *func = insn->func; + + if (!func) + return NULL; + + if (&next->list != &file->insn_list && next->func == func) + return next; + + /* Check if we're already in the subfunction: */ + if (func == func->cfunc) + return NULL; + + /* Move to the subfunction: */ + return find_insn(file, func->cfunc->sec, func->cfunc->offset); +} + +#define func_for_each_insn_all(file, func, insn) \ + for (insn = find_insn(file, func->sec, func->offset); \ + insn; \ + insn = next_insn_same_func(file, insn)) + #define func_for_each_insn(file, func, insn) \ for (insn = find_insn(file, func->sec, func->offset); \ insn && &insn->list != &file->insn_list && \ @@ -149,10 +174,14 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, if (!strcmp(func->name, global_noreturns[i])) return 1; - if (!func->sec) + if (!func->len) return 0; - func_for_each_insn(file, func, insn) { + insn = find_insn(file, func->sec, func->offset); + if (!insn->func) + return 0; + + func_for_each_insn_all(file, func, insn) { empty = false; if (insn->type == INSN_RETURN) @@ -167,35 +196,28 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, * case, the function's dead-end status depends on whether the target * of the sibling call returns. */ - func_for_each_insn(file, func, insn) { - if (insn->sec != func->sec || - insn->offset >= func->offset + func->len) - break; - + func_for_each_insn_all(file, func, insn) { if (insn->type == INSN_JUMP_UNCONDITIONAL) { struct instruction *dest = insn->jump_dest; - struct symbol *dest_func; if (!dest) /* sibling call to another file */ return 0; - if (dest->sec != func->sec || - dest->offset < func->offset || - dest->offset >= func->offset + func->len) { - /* local sibling call */ - dest_func = find_symbol_by_offset(dest->sec, - dest->offset); - if (!dest_func) - continue; + if (dest->func && dest->func->pfunc != insn->func->pfunc) { + /* local sibling call */ if (recursion == 5) { - WARN_FUNC("infinite recursion (objtool bug!)", - dest->sec, dest->offset); - return -1; + /* + * Infinite recursion: two functions + * have sibling calls to each other. + * This is a very rare case. It means + * they aren't dead ends. + */ + return 0; } - return __dead_end_function(file, dest_func, + return __dead_end_function(file, dest->func, recursion + 1); } } @@ -422,7 +444,7 @@ static void add_ignores(struct objtool_file *file) if (!ignore_func(file, func)) continue; - func_for_each_insn(file, func, insn) + func_for_each_insn_all(file, func, insn) insn->ignore = true; } } @@ -782,30 +804,35 @@ out: return ret; } -static int add_switch_table(struct objtool_file *file, struct symbol *func, - struct instruction *insn, struct rela *table, - struct rela *next_table) +static int add_switch_table(struct objtool_file *file, struct instruction *insn, + struct rela *table, struct rela *next_table) { struct rela *rela = table; struct instruction *alt_insn; struct alternative *alt; + struct symbol *pfunc = insn->func->pfunc; + unsigned int prev_offset = 0; list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) { if (rela == next_table) break; - if (rela->sym->sec != insn->sec || - rela->addend <= func->offset || - rela->addend >= func->offset + func->len) + /* Make sure the switch table entries are consecutive: */ + if (prev_offset && rela->offset != prev_offset + 8) break; - alt_insn = find_insn(file, insn->sec, rela->addend); - if (!alt_insn) { - WARN("%s: can't find instruction at %s+0x%x", - file->rodata->rela->name, insn->sec->name, - rela->addend); - return -1; - } + /* Detect function pointers from contiguous objects: */ + if (rela->sym->sec == pfunc->sec && + rela->addend == pfunc->offset) + break; + + alt_insn = find_insn(file, rela->sym->sec, rela->addend); + if (!alt_insn) + break; + + /* Make sure the jmp dest is in the function or subfunction: */ + if (alt_insn->func->pfunc != pfunc) + break; alt = malloc(sizeof(*alt)); if (!alt) { @@ -815,6 +842,13 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func, alt->insn = alt_insn; list_add_tail(&alt->list, &insn->alts); + prev_offset = rela->offset; + } + + if (!prev_offset) { + WARN_FUNC("can't find switch jump table", + insn->sec, insn->offset); + return -1; } return 0; @@ -869,40 +903,21 @@ static struct rela *find_switch_table(struct objtool_file *file, { struct rela *text_rela, *rodata_rela; struct instruction *orig_insn = insn; + unsigned long table_offset; - text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len); - if (text_rela && text_rela->sym == file->rodata->sym) { - /* case 1 */ - rodata_rela = find_rela_by_dest(file->rodata, - text_rela->addend); - if (rodata_rela) - return rodata_rela; - - /* case 2 */ - rodata_rela = find_rela_by_dest(file->rodata, - text_rela->addend + 4); - if (!rodata_rela) - return NULL; - - file->ignore_unreachables = true; - return rodata_rela; - } - - /* case 3 */ /* * Backward search using the @first_jump_src links, these help avoid * much of the 'in between' code. Which avoids us getting confused by * it. */ - for (insn = list_prev_entry(insn, list); - + for (; &insn->list != &file->insn_list && insn->sec == func->sec && insn->offset >= func->offset; insn = insn->first_jump_src ?: list_prev_entry(insn, list)) { - if (insn->type == INSN_JUMP_DYNAMIC) + if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC) break; /* allow small jumps within the range */ @@ -918,18 +933,29 @@ static struct rela *find_switch_table(struct objtool_file *file, if (!text_rela || text_rela->sym != file->rodata->sym) continue; + table_offset = text_rela->addend; + if (text_rela->type == R_X86_64_PC32) + table_offset += 4; + /* * Make sure the .rodata address isn't associated with a * symbol. gcc jump tables are anonymous data. */ - if (find_symbol_containing(file->rodata, text_rela->addend)) + if (find_symbol_containing(file->rodata, table_offset)) continue; - rodata_rela = find_rela_by_dest(file->rodata, text_rela->addend); - if (!rodata_rela) - continue; + rodata_rela = find_rela_by_dest(file->rodata, table_offset); + if (rodata_rela) { + /* + * Use of RIP-relative switch jumps is quite rare, and + * indicates a rare GCC quirk/bug which can leave dead + * code behind. + */ + if (text_rela->type == R_X86_64_PC32) + file->ignore_unreachables = true; - return rodata_rela; + return rodata_rela; + } } return NULL; @@ -943,7 +969,7 @@ static int add_func_switch_tables(struct objtool_file *file, struct rela *rela, *prev_rela = NULL; int ret; - func_for_each_insn(file, func, insn) { + func_for_each_insn_all(file, func, insn) { if (!last) last = insn; @@ -974,8 +1000,7 @@ static int add_func_switch_tables(struct objtool_file *file, * the beginning of another switch table in the same function. */ if (prev_jump) { - ret = add_switch_table(file, func, prev_jump, prev_rela, - rela); + ret = add_switch_table(file, prev_jump, prev_rela, rela); if (ret) return ret; } @@ -985,7 +1010,7 @@ static int add_func_switch_tables(struct objtool_file *file, } if (prev_jump) { - ret = add_switch_table(file, func, prev_jump, prev_rela, NULL); + ret = add_switch_table(file, prev_jump, prev_rela, NULL); if (ret) return ret; } @@ -1749,15 +1774,13 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, while (1) { next_insn = next_insn_same_sec(file, insn); - - if (file->c_file && func && insn->func && func != insn->func) { + if (file->c_file && func && insn->func && func != insn->func->pfunc) { WARN("%s() falls through to next function %s()", func->name, insn->func->name); return 1; } - if (insn->func) - func = insn->func; + func = insn->func ? insn->func->pfunc : NULL; if (func && insn->ignore) { WARN_FUNC("BUG: why am I validating an ignored function?", @@ -1778,7 +1801,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, i = insn; save_insn = NULL; - func_for_each_insn_continue_reverse(file, func, i) { + func_for_each_insn_continue_reverse(file, insn->func, i) { if (i->save) { save_insn = i; break; @@ -1865,7 +1888,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, case INSN_JUMP_UNCONDITIONAL: if (insn->jump_dest && (!func || !insn->jump_dest->func || - func == insn->jump_dest->func)) { + insn->jump_dest->func->pfunc == func)) { ret = validate_branch(file, insn->jump_dest, state); if (ret) @@ -2060,7 +2083,7 @@ static int validate_functions(struct objtool_file *file) for_each_sec(file, sec) { list_for_each_entry(func, &sec->symbol_list, list) { - if (func->type != STT_FUNC) + if (func->type != STT_FUNC || func->pfunc != func) continue; insn = find_insn(file, sec, func->offset); diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index c1c338661699..4e60e105583e 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -79,6 +79,19 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset) return NULL; } +struct symbol *find_symbol_by_name(struct elf *elf, const char *name) +{ + struct section *sec; + struct symbol *sym; + + list_for_each_entry(sec, &elf->sections, list) + list_for_each_entry(sym, &sec->symbol_list, list) + if (!strcmp(sym->name, name)) + return sym; + + return NULL; +} + struct symbol *find_symbol_containing(struct section *sec, unsigned long offset) { struct symbol *sym; @@ -203,10 +216,11 @@ static int read_sections(struct elf *elf) static int read_symbols(struct elf *elf) { - struct section *symtab; - struct symbol *sym; + struct section *symtab, *sec; + struct symbol *sym, *pfunc; struct list_head *entry, *tmp; int symbols_nr, i; + char *coldstr; symtab = find_section_by_name(elf, ".symtab"); if (!symtab) { @@ -281,6 +295,30 @@ static int read_symbols(struct elf *elf) hash_add(sym->sec->symbol_hash, &sym->hash, sym->idx); } + /* Create parent/child links for any cold subfunctions */ + list_for_each_entry(sec, &elf->sections, list) { + list_for_each_entry(sym, &sec->symbol_list, list) { + if (sym->type != STT_FUNC) + continue; + sym->pfunc = sym->cfunc = sym; + coldstr = strstr(sym->name, ".cold."); + if (coldstr) { + coldstr[0] = '\0'; + pfunc = find_symbol_by_name(elf, sym->name); + coldstr[0] = '.'; + + if (!pfunc) { + WARN("%s(): can't find parent function", + sym->name); + goto err; + } + + sym->pfunc = pfunc; + pfunc->cfunc = sym; + } + } + } + return 0; err: diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index d86e2ff14466..de5cd2ddded9 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -61,6 +61,7 @@ struct symbol { unsigned char bind, type; unsigned long offset; unsigned int len; + struct symbol *pfunc, *cfunc; }; struct rela { @@ -86,6 +87,7 @@ struct elf { struct elf *elf_open(const char *name, int flags); struct section *find_section_by_name(struct elf *elf, const char *name); struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); +struct symbol *find_symbol_by_name(struct elf *elf, const char *name); struct symbol *find_symbol_containing(struct section *sec, unsigned long offset); struct rela *find_rela_by_dest(struct section *sec, unsigned long offset); struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 5b4fff3adc4b..32f4a898e3f2 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -334,6 +334,11 @@ annotate.*:: 99.93 │ mov %eax,%eax + annotate.offset_level:: + Default is '1', meaning just jump targets will have offsets show right beside + the instruction. When set to '2' 'call' instructions will also have its offsets + shown, 3 or higher will show offsets for all instructions. + hist.*:: hist.percentage:: This option control the way to calculate overhead of filtered entries - diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index b0211410969b..f8d2167cf3e7 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -28,29 +28,46 @@ OPTIONS <command>...:: Any command you can specify in a shell. +-i:: +--input=<file>:: + Input file name. + -f:: --force:: Don't do ownership validation -t:: ---type=:: +--type=<type>:: Select the memory operation type: load or store (default: load,store) -D:: ---dump-raw-samples=:: +--dump-raw-samples:: Dump the raw decoded samples on the screen in a format that is easy to parse with one sample per line. -x:: ---field-separator:: +--field-separator=<separator>:: Specify the field separator used when dump raw samples (-D option). By default, The separator is the space character. -C:: ---cpu-list:: - Restrict dump of raw samples to those provided via this option. Note that the same - option can be passed in record mode. It will be interpreted the same way as perf - record. +--cpu=<cpu>:: + Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a + comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. Default + is to monitor all CPUS. +-U:: +--hide-unresolved:: + Only display entries resolved to a symbol. + +-p:: +--phys-data:: + Record/Report sample physical addresses + +RECORD OPTIONS +-------------- +-e:: +--event <event>:: + Event selector. Use 'perf mem record -e list' to list available events. -K:: --all-kernel:: @@ -60,12 +77,15 @@ OPTIONS --all-user:: Configure all used events to run in user space. ---ldload:: +-v:: +--verbose:: + Be more verbose (show counter open errors, etc) + +--ldlat <n>:: Specify desired latency for loads event. --p:: ---phys-data:: - Record/Report sample physical addresses +In addition, for report all perf report options are valid, and for record +all perf record options. SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index e1a660e60849..917e36fde6d8 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -80,6 +80,7 @@ OPTIONS - comm: command (name) of the task which can be read via /proc/<pid>/comm - pid: command and tid of the task - dso: name of library or module executed at the time of sample + - dso_size: size of library or module executed at the time of sample - symbol: name of function executed at the time of sample - symbol_size: size of function executed at the time of sample - parent: name of function matched to the parent regex filter. Unmatched diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index bb33601a823b..63f938b887dd 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -104,8 +104,8 @@ OPTIONS for 'perf sched timehist' kallsyms pathname -g:: ---no-call-graph:: - Do not display call chains if present. +--call-graph:: + Display call chains if present (default on). --max-stack:: Maximum number of functions to display in backtrace, default 5. diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 36ec0257f8d3..afdafe2110a1 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -228,14 +228,15 @@ OPTIONS For sample events it's possible to display misc field with -F +misc option, following letters are displayed for each bit: - PERF_RECORD_MISC_KERNEL K - PERF_RECORD_MISC_USER U - PERF_RECORD_MISC_HYPERVISOR H - PERF_RECORD_MISC_GUEST_KERNEL G - PERF_RECORD_MISC_GUEST_USER g - PERF_RECORD_MISC_MMAP_DATA* M - PERF_RECORD_MISC_COMM_EXEC E - PERF_RECORD_MISC_SWITCH_OUT S + PERF_RECORD_MISC_KERNEL K + PERF_RECORD_MISC_USER U + PERF_RECORD_MISC_HYPERVISOR H + PERF_RECORD_MISC_GUEST_KERNEL G + PERF_RECORD_MISC_GUEST_USER g + PERF_RECORD_MISC_MMAP_DATA* M + PERF_RECORD_MISC_COMM_EXEC E + PERF_RECORD_MISC_SWITCH_OUT S + PERF_RECORD_MISC_SWITCH_OUT_PREEMPT Sp $ perf script -F +misc ... sched-messaging 1414 K 28690.636582: 4590 cycles ... diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index f15b306be183..e6c3b4e555c2 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -153,7 +153,7 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m -I msecs:: --interval-print msecs:: -Print count deltas every N milliseconds (minimum: 10ms) +Print count deltas every N milliseconds (minimum: 1ms) The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution. example: 'perf stat -I 1000 -e cycles -a sleep 5' diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 5a7035c5c523..115db9e06ecd 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -117,6 +117,9 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. --sched:: Accrue thread runtime and provide a summary at the end of the session. +--failure:: + Show only syscalls that failed, i.e. that returned < 0. + -i:: --input:: Process events from a given perf data file. diff --git a/tools/perf/Documentation/perf-version.txt b/tools/perf/Documentation/perf-version.txt new file mode 100644 index 000000000000..e207b7cfca26 --- /dev/null +++ b/tools/perf/Documentation/perf-version.txt @@ -0,0 +1,24 @@ +perf-version(1) +=============== + +NAME +---- +perf-version - display the version of perf binary + +SYNOPSIS +-------- +'perf version' [--build-options] + +DESCRIPTION +----------- +With no options given, the 'perf version' prints the perf version +on the standard output. + +If the option '--build-options' is given, then the status of +compiled-in libraries are printed on the standard output. + +OPTIONS +------- +--build-options:: + Prints the status of compiled-in libraries on the + standard output. diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index d00f0d51cab8..dfb218feaad9 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -111,8 +111,8 @@ A perf_header_string with the CPU architecture (uname -m) A structure defining the number of CPUs. struct nr_cpus { - uint32_t nr_cpus_online; uint32_t nr_cpus_available; /* CPUs not yet onlined */ + uint32_t nr_cpus_online; }; HEADER_CPUDESC = 8, @@ -153,10 +153,18 @@ struct { HEADER_CPU_TOPOLOGY = 13, String lists defining the core and CPU threads topology. +The string lists are followed by a variable length array +which contains core_id and socket_id of each cpu. +The number of entries can be determined by the size of the +section minus the sizes of both string lists. struct { struct perf_header_string_list cores; /* Variable length */ struct perf_header_string_list threads; /* Variable length */ + struct { + uint32_t core_id; + uint32_t socket_id; + } cpus[nr]; /* Variable length records */ }; Example: diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 98ff73648b51..ae7dc46e8f8a 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -68,7 +68,7 @@ ifeq ($(NO_PERF_REGS),0) endif ifneq ($(NO_SYSCALL_TABLE),1) - CFLAGS += -DHAVE_SYSCALL_TABLE + CFLAGS += -DHAVE_SYSCALL_TABLE_SUPPORT endif # So far there's only x86 and arm libdw unwind support merged in perf. @@ -346,12 +346,16 @@ else ifneq ($(feature-dwarf_getlocations), 1) msg := $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.157); else - CFLAGS += -DHAVE_DWARF_GETLOCATIONS + CFLAGS += -DHAVE_DWARF_GETLOCATIONS_SUPPORT endif # dwarf_getlocations endif # Dwarf support endif # libelf support endif # NO_LIBELF +ifeq ($(feature-glibc), 1) + CFLAGS += -DHAVE_GLIBC_SUPPORT +endif + ifdef NO_DWARF NO_LIBDW_DWARF_UNWIND := 1 endif @@ -635,6 +639,7 @@ else else LDFLAGS += $(PERL_EMBED_LDFLAGS) EXTLIBS += $(PERL_EMBED_LIBADD) + CFLAGS += -DHAVE_LIBPERL_SUPPORT $(call detected,CONFIG_LIBPERL) endif endif @@ -671,6 +676,7 @@ else LDFLAGS += $(PYTHON_EMBED_LDFLAGS) EXTLIBS += $(PYTHON_EMBED_LIBADD) LANG_BINDINGS += $(obj-perf)python/perf.so + CFLAGS += -DHAVE_LIBPYTHON_SUPPORT $(call detected,CONFIG_LIBPYTHON) endif endif @@ -841,7 +847,7 @@ ifndef NO_JVMTI ifeq ($(feature-jvmti), 1) $(call detected_var,JDIR) else - $(warning No openjdk development package found, please install JDK package) + $(warning No openjdk development package found, please install JDK package, e.g. openjdk-8-jdk, java-1.8.0-openjdk-devel) NO_JVMTI := 1 endif endif diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index f7517e1b73f8..83e453de36f8 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -364,7 +364,8 @@ LIBS = -Wl,--whole-archive $(PERFLIBS) $(EXTRA_PERFLIBS) -Wl,--no-whole-archive ifeq ($(USE_CLANG), 1) CLANGLIBS_LIST = AST Basic CodeGen Driver Frontend Lex Tooling Edit Sema Analysis Parse Serialization - LIBCLANG = $(foreach l,$(CLANGLIBS_LIST),$(wildcard $(shell $(LLVM_CONFIG) --libdir)/libclang$(l).a)) + CLANGLIBS_NOEXT_LIST = $(foreach l,$(CLANGLIBS_LIST),$(shell $(LLVM_CONFIG) --libdir)/libclang$(l)) + LIBCLANG = $(foreach l,$(CLANGLIBS_NOEXT_LIST),$(wildcard $(l).a $(l).so)) LIBS += -Wl,--start-group $(LIBCLANG) -Wl,--end-group endif diff --git a/tools/perf/arch/arm/include/arch-tests.h b/tools/perf/arch/arm/include/arch-tests.h new file mode 100644 index 000000000000..90ec4c8cb880 --- /dev/null +++ b/tools/perf/arch/arm/include/arch-tests.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_TESTS_H +#define ARCH_TESTS_H + +#ifdef HAVE_DWARF_UNWIND_SUPPORT +struct thread; +struct perf_sample; +#endif + +extern struct test arch_tests[]; + +#endif diff --git a/tools/perf/arch/arm/tests/Build b/tools/perf/arch/arm/tests/Build index b30eff9bcc83..883c57ff0c08 100644 --- a/tools/perf/arch/arm/tests/Build +++ b/tools/perf/arch/arm/tests/Build @@ -1,2 +1,4 @@ libperf-y += regs_load.o libperf-y += dwarf-unwind.o + +libperf-y += arch-tests.o diff --git a/tools/perf/arch/arm/tests/arch-tests.c b/tools/perf/arch/arm/tests/arch-tests.c new file mode 100644 index 000000000000..5b1543c98022 --- /dev/null +++ b/tools/perf/arch/arm/tests/arch-tests.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <string.h> +#include "tests/tests.h" +#include "arch-tests.h" + +struct test arch_tests[] = { +#ifdef HAVE_DWARF_UNWIND_SUPPORT + { + .desc = "DWARF unwind", + .func = test__dwarf_unwind, + }, +#endif + { + .func = NULL, + }, +}; diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c index fa639e3e52ac..1ce6bdbda561 100644 --- a/tools/perf/arch/arm/util/auxtrace.c +++ b/tools/perf/arch/arm/util/auxtrace.c @@ -1,18 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright(C) 2015 Linaro Limited. All rights reserved. * Author: Mathieu Poirier <mathieu.poirier@linaro.org> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #include <stdbool.h> diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 5c655ad4621e..2f595cd73da6 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -1,18 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright(C) 2015 Linaro Limited. All rights reserved. * Author: Mathieu Poirier <mathieu.poirier@linaro.org> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #include <api/fs/fs.h> diff --git a/tools/perf/arch/arm/util/cs-etm.h b/tools/perf/arch/arm/util/cs-etm.h index 5256741be549..1a12e64f5127 100644 --- a/tools/perf/arch/arm/util/cs-etm.h +++ b/tools/perf/arch/arm/util/cs-etm.h @@ -1,18 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright(C) 2015 Linaro Limited. All rights reserved. * Author: Mathieu Poirier <mathieu.poirier@linaro.org> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef INCLUDE__PERF_CS_ETM_H__ diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c index ac4dffc807b8..e047571e6080 100644 --- a/tools/perf/arch/arm/util/pmu.c +++ b/tools/perf/arch/arm/util/pmu.c @@ -1,18 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright(C) 2015 Linaro Limited. All rights reserved. * Author: Mathieu Poirier <mathieu.poirier@linaro.org> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #include <string.h> diff --git a/tools/perf/arch/s390/util/auxtrace.c b/tools/perf/arch/s390/util/auxtrace.c index 6cb48e4cffd9..3afe8256eff2 100644 --- a/tools/perf/arch/s390/util/auxtrace.c +++ b/tools/perf/arch/s390/util/auxtrace.c @@ -87,6 +87,7 @@ struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist, struct perf_evsel *pos; int diagnose = 0; + *err = 0; if (evlist->nr_entries == 0) return NULL; diff --git a/tools/perf/arch/s390/util/header.c b/tools/perf/arch/s390/util/header.c index a4c30f1c70be..163b92f33998 100644 --- a/tools/perf/arch/s390/util/header.c +++ b/tools/perf/arch/s390/util/header.c @@ -146,21 +146,3 @@ char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused) zfree(&buf); return buf; } - -/* - * Compare the cpuid string returned by get_cpuid() function - * with the name generated by the jevents file read from - * pmu-events/arch/s390/mapfile.csv. - * - * Parameter mapcpuid is the cpuid as stored in the - * pmu-events/arch/s390/mapfile.csv. This is just the type number. - * Parameter cpuid is the cpuid returned by function get_cpuid(). - */ -int strcmp_cpuid_str(const char *mapcpuid, const char *cpuid) -{ - char *cp = strchr(cpuid, ','); - - if (cp == NULL) - return -1; - return strncmp(cp + 1, mapcpuid, strlen(mapcpuid)); -} diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index d74eaa7aa927..1a38e78117ce 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -21,7 +21,7 @@ _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') $(header): $(sys)/syscall_64.tbl $(systbl) @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ (diff -B arch/x86/entry/syscalls/syscall_64.tbl ../../arch/x86/entry/syscalls/syscall_64.tbl >/dev/null) \ - || echo "Warning: Kernel ABI header at 'tools/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'" >&2 )) || true + || echo "Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'" >&2 )) || true $(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@ clean:: diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c index 5bd1ba8c0282..44f5aba78210 100644 --- a/tools/perf/arch/x86/annotate/instructions.c +++ b/tools/perf/arch/x86/annotate/instructions.c @@ -1,21 +1,43 @@ // SPDX-License-Identifier: GPL-2.0 static struct ins x86__instructions[] = { + { .name = "adc", .ops = &mov_ops, }, + { .name = "adcb", .ops = &mov_ops, }, + { .name = "adcl", .ops = &mov_ops, }, { .name = "add", .ops = &mov_ops, }, { .name = "addl", .ops = &mov_ops, }, { .name = "addq", .ops = &mov_ops, }, + { .name = "addsd", .ops = &mov_ops, }, { .name = "addw", .ops = &mov_ops, }, { .name = "and", .ops = &mov_ops, }, + { .name = "andb", .ops = &mov_ops, }, + { .name = "andl", .ops = &mov_ops, }, + { .name = "andpd", .ops = &mov_ops, }, + { .name = "andps", .ops = &mov_ops, }, + { .name = "andq", .ops = &mov_ops, }, + { .name = "andw", .ops = &mov_ops, }, + { .name = "bsr", .ops = &mov_ops, }, + { .name = "bt", .ops = &mov_ops, }, + { .name = "btr", .ops = &mov_ops, }, { .name = "bts", .ops = &mov_ops, }, + { .name = "btsq", .ops = &mov_ops, }, { .name = "call", .ops = &call_ops, }, { .name = "callq", .ops = &call_ops, }, + { .name = "cmovbe", .ops = &mov_ops, }, + { .name = "cmove", .ops = &mov_ops, }, + { .name = "cmovae", .ops = &mov_ops, }, { .name = "cmp", .ops = &mov_ops, }, { .name = "cmpb", .ops = &mov_ops, }, { .name = "cmpl", .ops = &mov_ops, }, { .name = "cmpq", .ops = &mov_ops, }, { .name = "cmpw", .ops = &mov_ops, }, { .name = "cmpxch", .ops = &mov_ops, }, + { .name = "cmpxchg", .ops = &mov_ops, }, + { .name = "cs", .ops = &mov_ops, }, { .name = "dec", .ops = &dec_ops, }, { .name = "decl", .ops = &dec_ops, }, + { .name = "divsd", .ops = &mov_ops, }, + { .name = "divss", .ops = &mov_ops, }, + { .name = "gs", .ops = &mov_ops, }, { .name = "imul", .ops = &mov_ops, }, { .name = "inc", .ops = &dec_ops, }, { .name = "incl", .ops = &dec_ops, }, @@ -57,25 +79,68 @@ static struct ins x86__instructions[] = { { .name = "lea", .ops = &mov_ops, }, { .name = "lock", .ops = &lock_ops, }, { .name = "mov", .ops = &mov_ops, }, + { .name = "movapd", .ops = &mov_ops, }, + { .name = "movaps", .ops = &mov_ops, }, { .name = "movb", .ops = &mov_ops, }, { .name = "movdqa", .ops = &mov_ops, }, + { .name = "movdqu", .ops = &mov_ops, }, { .name = "movl", .ops = &mov_ops, }, { .name = "movq", .ops = &mov_ops, }, + { .name = "movsd", .ops = &mov_ops, }, { .name = "movslq", .ops = &mov_ops, }, + { .name = "movss", .ops = &mov_ops, }, + { .name = "movupd", .ops = &mov_ops, }, + { .name = "movups", .ops = &mov_ops, }, + { .name = "movw", .ops = &mov_ops, }, { .name = "movzbl", .ops = &mov_ops, }, { .name = "movzwl", .ops = &mov_ops, }, + { .name = "mulsd", .ops = &mov_ops, }, + { .name = "mulss", .ops = &mov_ops, }, { .name = "nop", .ops = &nop_ops, }, { .name = "nopl", .ops = &nop_ops, }, { .name = "nopw", .ops = &nop_ops, }, { .name = "or", .ops = &mov_ops, }, + { .name = "orb", .ops = &mov_ops, }, { .name = "orl", .ops = &mov_ops, }, + { .name = "orps", .ops = &mov_ops, }, + { .name = "orq", .ops = &mov_ops, }, + { .name = "pand", .ops = &mov_ops, }, + { .name = "paddq", .ops = &mov_ops, }, + { .name = "pcmpeqb", .ops = &mov_ops, }, + { .name = "por", .ops = &mov_ops, }, + { .name = "rclb", .ops = &mov_ops, }, + { .name = "rcll", .ops = &mov_ops, }, + { .name = "retq", .ops = &ret_ops, }, + { .name = "sbb", .ops = &mov_ops, }, + { .name = "sbbl", .ops = &mov_ops, }, + { .name = "sete", .ops = &mov_ops, }, + { .name = "sub", .ops = &mov_ops, }, + { .name = "subl", .ops = &mov_ops, }, + { .name = "subq", .ops = &mov_ops, }, + { .name = "subsd", .ops = &mov_ops, }, + { .name = "subw", .ops = &mov_ops, }, { .name = "test", .ops = &mov_ops, }, { .name = "testb", .ops = &mov_ops, }, { .name = "testl", .ops = &mov_ops, }, + { .name = "ucomisd", .ops = &mov_ops, }, + { .name = "ucomiss", .ops = &mov_ops, }, + { .name = "vaddsd", .ops = &mov_ops, }, + { .name = "vandpd", .ops = &mov_ops, }, + { .name = "vmovdqa", .ops = &mov_ops, }, + { .name = "vmovq", .ops = &mov_ops, }, + { .name = "vmovsd", .ops = &mov_ops, }, + { .name = "vmulsd", .ops = &mov_ops, }, + { .name = "vorpd", .ops = &mov_ops, }, + { .name = "vsubsd", .ops = &mov_ops, }, + { .name = "vucomisd", .ops = &mov_ops, }, { .name = "xadd", .ops = &mov_ops, }, { .name = "xbeginl", .ops = &jump_ops, }, { .name = "xbeginq", .ops = &jump_ops, }, - { .name = "retq", .ops = &ret_ops, }, + { .name = "xchg", .ops = &mov_ops, }, + { .name = "xor", .ops = &mov_ops, }, + { .name = "xorb", .ops = &mov_ops, }, + { .name = "xorpd", .ops = &mov_ops, }, + { .name = "xorps", .ops = &mov_ops, }, }; static bool x86__ins_is_fused(struct arch *arch, const char *ins1, diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 5aef183e2f85..4dfe42666d0c 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -4,379 +4,383 @@ # The format is: # <number> <abi> <name> <entry point> # +# The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls +# # The abi is "common", "64" or "x32" for this file. # -0 common read sys_read -1 common write sys_write -2 common open sys_open -3 common close sys_close -4 common stat sys_newstat -5 common fstat sys_newfstat -6 common lstat sys_newlstat -7 common poll sys_poll -8 common lseek sys_lseek -9 common mmap sys_mmap -10 common mprotect sys_mprotect -11 common munmap sys_munmap -12 common brk sys_brk -13 64 rt_sigaction sys_rt_sigaction -14 common rt_sigprocmask sys_rt_sigprocmask -15 64 rt_sigreturn sys_rt_sigreturn/ptregs -16 64 ioctl sys_ioctl -17 common pread64 sys_pread64 -18 common pwrite64 sys_pwrite64 -19 64 readv sys_readv -20 64 writev sys_writev -21 common access sys_access -22 common pipe sys_pipe -23 common select sys_select -24 common sched_yield sys_sched_yield -25 common mremap sys_mremap -26 common msync sys_msync -27 common mincore sys_mincore -28 common madvise sys_madvise -29 common shmget sys_shmget -30 common shmat sys_shmat -31 common shmctl sys_shmctl -32 common dup sys_dup -33 common dup2 sys_dup2 -34 common pause sys_pause -35 common nanosleep sys_nanosleep -36 common getitimer sys_getitimer -37 common alarm sys_alarm -38 common setitimer sys_setitimer -39 common getpid sys_getpid -40 common sendfile sys_sendfile64 -41 common socket sys_socket -42 common connect sys_connect -43 common accept sys_accept -44 common sendto sys_sendto -45 64 recvfrom sys_recvfrom -46 64 sendmsg sys_sendmsg -47 64 recvmsg sys_recvmsg -48 common shutdown sys_shutdown -49 common bind sys_bind -50 common listen sys_listen -51 common getsockname sys_getsockname -52 common getpeername sys_getpeername -53 common socketpair sys_socketpair -54 64 setsockopt sys_setsockopt -55 64 getsockopt sys_getsockopt -56 common clone sys_clone/ptregs -57 common fork sys_fork/ptregs -58 common vfork sys_vfork/ptregs -59 64 execve sys_execve/ptregs -60 common exit sys_exit -61 common wait4 sys_wait4 -62 common kill sys_kill -63 common uname sys_newuname -64 common semget sys_semget -65 common semop sys_semop -66 common semctl sys_semctl -67 common shmdt sys_shmdt -68 common msgget sys_msgget -69 common msgsnd sys_msgsnd -70 common msgrcv sys_msgrcv -71 common msgctl sys_msgctl -72 common fcntl sys_fcntl -73 common flock sys_flock -74 common fsync sys_fsync -75 common fdatasync sys_fdatasync -76 common truncate sys_truncate -77 common ftruncate sys_ftruncate -78 common getdents sys_getdents -79 common getcwd sys_getcwd -80 common chdir sys_chdir -81 common fchdir sys_fchdir -82 common rename sys_rename -83 common mkdir sys_mkdir -84 common rmdir sys_rmdir -85 common creat sys_creat -86 common link sys_link -87 common unlink sys_unlink -88 common symlink sys_symlink -89 common readlink sys_readlink -90 common chmod sys_chmod -91 common fchmod sys_fchmod -92 common chown sys_chown -93 common fchown sys_fchown -94 common lchown sys_lchown -95 common umask sys_umask -96 common gettimeofday sys_gettimeofday -97 common getrlimit sys_getrlimit -98 common getrusage sys_getrusage -99 common sysinfo sys_sysinfo -100 common times sys_times -101 64 ptrace sys_ptrace -102 common getuid sys_getuid -103 common syslog sys_syslog -104 common getgid sys_getgid -105 common setuid sys_setuid -106 common setgid sys_setgid -107 common geteuid sys_geteuid -108 common getegid sys_getegid -109 common setpgid sys_setpgid -110 common getppid sys_getppid -111 common getpgrp sys_getpgrp -112 common setsid sys_setsid -113 common setreuid sys_setreuid -114 common setregid sys_setregid -115 common getgroups sys_getgroups -116 common setgroups sys_setgroups -117 common setresuid sys_setresuid -118 common getresuid sys_getresuid -119 common setresgid sys_setresgid -120 common getresgid sys_getresgid -121 common getpgid sys_getpgid -122 common setfsuid sys_setfsuid -123 common setfsgid sys_setfsgid -124 common getsid sys_getsid -125 common capget sys_capget -126 common capset sys_capset -127 64 rt_sigpending sys_rt_sigpending -128 64 rt_sigtimedwait sys_rt_sigtimedwait -129 64 rt_sigqueueinfo sys_rt_sigqueueinfo -130 common rt_sigsuspend sys_rt_sigsuspend -131 64 sigaltstack sys_sigaltstack -132 common utime sys_utime -133 common mknod sys_mknod +0 common read __x64_sys_read +1 common write __x64_sys_write +2 common open __x64_sys_open +3 common close __x64_sys_close +4 common stat __x64_sys_newstat +5 common fstat __x64_sys_newfstat +6 common lstat __x64_sys_newlstat +7 common poll __x64_sys_poll +8 common lseek __x64_sys_lseek +9 common mmap __x64_sys_mmap +10 common mprotect __x64_sys_mprotect +11 common munmap __x64_sys_munmap +12 common brk __x64_sys_brk +13 64 rt_sigaction __x64_sys_rt_sigaction +14 common rt_sigprocmask __x64_sys_rt_sigprocmask +15 64 rt_sigreturn __x64_sys_rt_sigreturn/ptregs +16 64 ioctl __x64_sys_ioctl +17 common pread64 __x64_sys_pread64 +18 common pwrite64 __x64_sys_pwrite64 +19 64 readv __x64_sys_readv +20 64 writev __x64_sys_writev +21 common access __x64_sys_access +22 common pipe __x64_sys_pipe +23 common select __x64_sys_select +24 common sched_yield __x64_sys_sched_yield +25 common mremap __x64_sys_mremap +26 common msync __x64_sys_msync +27 common mincore __x64_sys_mincore +28 common madvise __x64_sys_madvise +29 common shmget __x64_sys_shmget +30 common shmat __x64_sys_shmat +31 common shmctl __x64_sys_shmctl +32 common dup __x64_sys_dup +33 common dup2 __x64_sys_dup2 +34 common pause __x64_sys_pause +35 common nanosleep __x64_sys_nanosleep +36 common getitimer __x64_sys_getitimer +37 common alarm __x64_sys_alarm +38 common setitimer __x64_sys_setitimer +39 common getpid __x64_sys_getpid +40 common sendfile __x64_sys_sendfile64 +41 common socket __x64_sys_socket +42 common connect __x64_sys_connect +43 common accept __x64_sys_accept +44 common sendto __x64_sys_sendto +45 64 recvfrom __x64_sys_recvfrom +46 64 sendmsg __x64_sys_sendmsg +47 64 recvmsg __x64_sys_recvmsg +48 common shutdown __x64_sys_shutdown +49 common bind __x64_sys_bind +50 common listen __x64_sys_listen +51 common getsockname __x64_sys_getsockname +52 common getpeername __x64_sys_getpeername +53 common socketpair __x64_sys_socketpair +54 64 setsockopt __x64_sys_setsockopt +55 64 getsockopt __x64_sys_getsockopt +56 common clone __x64_sys_clone/ptregs +57 common fork __x64_sys_fork/ptregs +58 common vfork __x64_sys_vfork/ptregs +59 64 execve __x64_sys_execve/ptregs +60 common exit __x64_sys_exit +61 common wait4 __x64_sys_wait4 +62 common kill __x64_sys_kill +63 common uname __x64_sys_newuname +64 common semget __x64_sys_semget +65 common semop __x64_sys_semop +66 common semctl __x64_sys_semctl +67 common shmdt __x64_sys_shmdt +68 common msgget __x64_sys_msgget +69 common msgsnd __x64_sys_msgsnd +70 common msgrcv __x64_sys_msgrcv +71 common msgctl __x64_sys_msgctl +72 common fcntl __x64_sys_fcntl +73 common flock __x64_sys_flock +74 common fsync __x64_sys_fsync +75 common fdatasync __x64_sys_fdatasync +76 common truncate __x64_sys_truncate +77 common ftruncate __x64_sys_ftruncate +78 common getdents __x64_sys_getdents +79 common getcwd __x64_sys_getcwd +80 common chdir __x64_sys_chdir +81 common fchdir __x64_sys_fchdir +82 common rename __x64_sys_rename +83 common mkdir __x64_sys_mkdir +84 common rmdir __x64_sys_rmdir +85 common creat __x64_sys_creat +86 common link __x64_sys_link +87 common unlink __x64_sys_unlink +88 common symlink __x64_sys_symlink +89 common readlink __x64_sys_readlink +90 common chmod __x64_sys_chmod +91 common fchmod __x64_sys_fchmod +92 common chown __x64_sys_chown +93 common fchown __x64_sys_fchown +94 common lchown __x64_sys_lchown +95 common umask __x64_sys_umask +96 common gettimeofday __x64_sys_gettimeofday +97 common getrlimit __x64_sys_getrlimit +98 common getrusage __x64_sys_getrusage +99 common sysinfo __x64_sys_sysinfo +100 common times __x64_sys_times +101 64 ptrace __x64_sys_ptrace +102 common getuid __x64_sys_getuid +103 common syslog __x64_sys_syslog +104 common getgid __x64_sys_getgid +105 common setuid __x64_sys_setuid +106 common setgid __x64_sys_setgid +107 common geteuid __x64_sys_geteuid +108 common getegid __x64_sys_getegid +109 common setpgid __x64_sys_setpgid +110 common getppid __x64_sys_getppid +111 common getpgrp __x64_sys_getpgrp +112 common setsid __x64_sys_setsid +113 common setreuid __x64_sys_setreuid +114 common setregid __x64_sys_setregid +115 common getgroups __x64_sys_getgroups +116 common setgroups __x64_sys_setgroups +117 common setresuid __x64_sys_setresuid +118 common getresuid __x64_sys_getresuid +119 common setresgid __x64_sys_setresgid +120 common getresgid __x64_sys_getresgid +121 common getpgid __x64_sys_getpgid +122 common setfsuid __x64_sys_setfsuid +123 common setfsgid __x64_sys_setfsgid +124 common getsid __x64_sys_getsid +125 common capget __x64_sys_capget +126 common capset __x64_sys_capset +127 64 rt_sigpending __x64_sys_rt_sigpending +128 64 rt_sigtimedwait __x64_sys_rt_sigtimedwait +129 64 rt_sigqueueinfo __x64_sys_rt_sigqueueinfo +130 common rt_sigsuspend __x64_sys_rt_sigsuspend +131 64 sigaltstack __x64_sys_sigaltstack +132 common utime __x64_sys_utime +133 common mknod __x64_sys_mknod 134 64 uselib -135 common personality sys_personality -136 common ustat sys_ustat -137 common statfs sys_statfs -138 common fstatfs sys_fstatfs -139 common sysfs sys_sysfs -140 common getpriority sys_getpriority -141 common setpriority sys_setpriority -142 common sched_setparam sys_sched_setparam -143 common sched_getparam sys_sched_getparam -144 common sched_setscheduler sys_sched_setscheduler -145 common sched_getscheduler sys_sched_getscheduler -146 common sched_get_priority_max sys_sched_get_priority_max -147 common sched_get_priority_min sys_sched_get_priority_min -148 common sched_rr_get_interval sys_sched_rr_get_interval -149 common mlock sys_mlock -150 common munlock sys_munlock -151 common mlockall sys_mlockall -152 common munlockall sys_munlockall -153 common vhangup sys_vhangup -154 common modify_ldt sys_modify_ldt -155 common pivot_root sys_pivot_root -156 64 _sysctl sys_sysctl -157 common prctl sys_prctl -158 common arch_prctl sys_arch_prctl -159 common adjtimex sys_adjtimex -160 common setrlimit sys_setrlimit -161 common chroot sys_chroot -162 common sync sys_sync -163 common acct sys_acct -164 common settimeofday sys_settimeofday -165 common mount sys_mount -166 common umount2 sys_umount -167 common swapon sys_swapon -168 common swapoff sys_swapoff -169 common reboot sys_reboot -170 common sethostname sys_sethostname -171 common setdomainname sys_setdomainname -172 common iopl sys_iopl/ptregs -173 common ioperm sys_ioperm +135 common personality __x64_sys_personality +136 common ustat __x64_sys_ustat +137 common statfs __x64_sys_statfs +138 common fstatfs __x64_sys_fstatfs +139 common sysfs __x64_sys_sysfs +140 common getpriority __x64_sys_getpriority +141 common setpriority __x64_sys_setpriority +142 common sched_setparam __x64_sys_sched_setparam +143 common sched_getparam __x64_sys_sched_getparam +144 common sched_setscheduler __x64_sys_sched_setscheduler +145 common sched_getscheduler __x64_sys_sched_getscheduler +146 common sched_get_priority_max __x64_sys_sched_get_priority_max +147 common sched_get_priority_min __x64_sys_sched_get_priority_min +148 common sched_rr_get_interval __x64_sys_sched_rr_get_interval +149 common mlock __x64_sys_mlock +150 common munlock __x64_sys_munlock +151 common mlockall __x64_sys_mlockall +152 common munlockall __x64_sys_munlockall +153 common vhangup __x64_sys_vhangup +154 common modify_ldt __x64_sys_modify_ldt +155 common pivot_root __x64_sys_pivot_root +156 64 _sysctl __x64_sys_sysctl +157 common prctl __x64_sys_prctl +158 common arch_prctl __x64_sys_arch_prctl +159 common adjtimex __x64_sys_adjtimex +160 common setrlimit __x64_sys_setrlimit +161 common chroot __x64_sys_chroot +162 common sync __x64_sys_sync +163 common acct __x64_sys_acct +164 common settimeofday __x64_sys_settimeofday +165 common mount __x64_sys_mount +166 common umount2 __x64_sys_umount +167 common swapon __x64_sys_swapon +168 common swapoff __x64_sys_swapoff +169 common reboot __x64_sys_reboot +170 common sethostname __x64_sys_sethostname +171 common setdomainname __x64_sys_setdomainname +172 common iopl __x64_sys_iopl/ptregs +173 common ioperm __x64_sys_ioperm 174 64 create_module -175 common init_module sys_init_module -176 common delete_module sys_delete_module +175 common init_module __x64_sys_init_module +176 common delete_module __x64_sys_delete_module 177 64 get_kernel_syms 178 64 query_module -179 common quotactl sys_quotactl +179 common quotactl __x64_sys_quotactl 180 64 nfsservctl 181 common getpmsg 182 common putpmsg 183 common afs_syscall 184 common tuxcall 185 common security -186 common gettid sys_gettid -187 common readahead sys_readahead -188 common setxattr sys_setxattr -189 common lsetxattr sys_lsetxattr -190 common fsetxattr sys_fsetxattr -191 common getxattr sys_getxattr -192 common lgetxattr sys_lgetxattr -193 common fgetxattr sys_fgetxattr -194 common listxattr sys_listxattr -195 common llistxattr sys_llistxattr -196 common flistxattr sys_flistxattr -197 common removexattr sys_removexattr -198 common lremovexattr sys_lremovexattr -199 common fremovexattr sys_fremovexattr -200 common tkill sys_tkill -201 common time sys_time -202 common futex sys_futex -203 common sched_setaffinity sys_sched_setaffinity -204 common sched_getaffinity sys_sched_getaffinity +186 common gettid __x64_sys_gettid +187 common readahead __x64_sys_readahead +188 common setxattr __x64_sys_setxattr +189 common lsetxattr __x64_sys_lsetxattr +190 common fsetxattr __x64_sys_fsetxattr +191 common getxattr __x64_sys_getxattr +192 common lgetxattr __x64_sys_lgetxattr +193 common fgetxattr __x64_sys_fgetxattr +194 common listxattr __x64_sys_listxattr +195 common llistxattr __x64_sys_llistxattr +196 common flistxattr __x64_sys_flistxattr +197 common removexattr __x64_sys_removexattr +198 common lremovexattr __x64_sys_lremovexattr +199 common fremovexattr __x64_sys_fremovexattr +200 common tkill __x64_sys_tkill +201 common time __x64_sys_time +202 common futex __x64_sys_futex +203 common sched_setaffinity __x64_sys_sched_setaffinity +204 common sched_getaffinity __x64_sys_sched_getaffinity 205 64 set_thread_area -206 64 io_setup sys_io_setup -207 common io_destroy sys_io_destroy -208 common io_getevents sys_io_getevents -209 64 io_submit sys_io_submit -210 common io_cancel sys_io_cancel +206 64 io_setup __x64_sys_io_setup +207 common io_destroy __x64_sys_io_destroy +208 common io_getevents __x64_sys_io_getevents +209 64 io_submit __x64_sys_io_submit +210 common io_cancel __x64_sys_io_cancel 211 64 get_thread_area -212 common lookup_dcookie sys_lookup_dcookie -213 common epoll_create sys_epoll_create +212 common lookup_dcookie __x64_sys_lookup_dcookie +213 common epoll_create __x64_sys_epoll_create 214 64 epoll_ctl_old 215 64 epoll_wait_old -216 common remap_file_pages sys_remap_file_pages -217 common getdents64 sys_getdents64 -218 common set_tid_address sys_set_tid_address -219 common restart_syscall sys_restart_syscall -220 common semtimedop sys_semtimedop -221 common fadvise64 sys_fadvise64 -222 64 timer_create sys_timer_create -223 common timer_settime sys_timer_settime -224 common timer_gettime sys_timer_gettime -225 common timer_getoverrun sys_timer_getoverrun -226 common timer_delete sys_timer_delete -227 common clock_settime sys_clock_settime -228 common clock_gettime sys_clock_gettime -229 common clock_getres sys_clock_getres -230 common clock_nanosleep sys_clock_nanosleep -231 common exit_group sys_exit_group -232 common epoll_wait sys_epoll_wait -233 common epoll_ctl sys_epoll_ctl -234 common tgkill sys_tgkill -235 common utimes sys_utimes +216 common remap_file_pages __x64_sys_remap_file_pages +217 common getdents64 __x64_sys_getdents64 +218 common set_tid_address __x64_sys_set_tid_address +219 common restart_syscall __x64_sys_restart_syscall +220 common semtimedop __x64_sys_semtimedop +221 common fadvise64 __x64_sys_fadvise64 +222 64 timer_create __x64_sys_timer_create +223 common timer_settime __x64_sys_timer_settime +224 common timer_gettime __x64_sys_timer_gettime +225 common timer_getoverrun __x64_sys_timer_getoverrun +226 common timer_delete __x64_sys_timer_delete +227 common clock_settime __x64_sys_clock_settime +228 common clock_gettime __x64_sys_clock_gettime +229 common clock_getres __x64_sys_clock_getres +230 common clock_nanosleep __x64_sys_clock_nanosleep +231 common exit_group __x64_sys_exit_group +232 common epoll_wait __x64_sys_epoll_wait +233 common epoll_ctl __x64_sys_epoll_ctl +234 common tgkill __x64_sys_tgkill +235 common utimes __x64_sys_utimes 236 64 vserver -237 common mbind sys_mbind -238 common set_mempolicy sys_set_mempolicy -239 common get_mempolicy sys_get_mempolicy -240 common mq_open sys_mq_open -241 common mq_unlink sys_mq_unlink -242 common mq_timedsend sys_mq_timedsend -243 common mq_timedreceive sys_mq_timedreceive -244 64 mq_notify sys_mq_notify -245 common mq_getsetattr sys_mq_getsetattr -246 64 kexec_load sys_kexec_load -247 64 waitid sys_waitid -248 common add_key sys_add_key -249 common request_key sys_request_key -250 common keyctl sys_keyctl -251 common ioprio_set sys_ioprio_set -252 common ioprio_get sys_ioprio_get -253 common inotify_init sys_inotify_init -254 common inotify_add_watch sys_inotify_add_watch -255 common inotify_rm_watch sys_inotify_rm_watch -256 common migrate_pages sys_migrate_pages -257 common openat sys_openat -258 common mkdirat sys_mkdirat -259 common mknodat sys_mknodat -260 common fchownat sys_fchownat -261 common futimesat sys_futimesat -262 common newfstatat sys_newfstatat -263 common unlinkat sys_unlinkat -264 common renameat sys_renameat -265 common linkat sys_linkat -266 common symlinkat sys_symlinkat -267 common readlinkat sys_readlinkat -268 common fchmodat sys_fchmodat -269 common faccessat sys_faccessat -270 common pselect6 sys_pselect6 -271 common ppoll sys_ppoll -272 common unshare sys_unshare -273 64 set_robust_list sys_set_robust_list -274 64 get_robust_list sys_get_robust_list -275 common splice sys_splice -276 common tee sys_tee -277 common sync_file_range sys_sync_file_range -278 64 vmsplice sys_vmsplice -279 64 move_pages sys_move_pages -280 common utimensat sys_utimensat -281 common epoll_pwait sys_epoll_pwait -282 common signalfd sys_signalfd -283 common timerfd_create sys_timerfd_create -284 common eventfd sys_eventfd -285 common fallocate sys_fallocate -286 common timerfd_settime sys_timerfd_settime -287 common timerfd_gettime sys_timerfd_gettime -288 common accept4 sys_accept4 -289 common signalfd4 sys_signalfd4 -290 common eventfd2 sys_eventfd2 -291 common epoll_create1 sys_epoll_create1 -292 common dup3 sys_dup3 -293 common pipe2 sys_pipe2 -294 common inotify_init1 sys_inotify_init1 -295 64 preadv sys_preadv -296 64 pwritev sys_pwritev -297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo -298 common perf_event_open sys_perf_event_open -299 64 recvmmsg sys_recvmmsg -300 common fanotify_init sys_fanotify_init -301 common fanotify_mark sys_fanotify_mark -302 common prlimit64 sys_prlimit64 -303 common name_to_handle_at sys_name_to_handle_at -304 common open_by_handle_at sys_open_by_handle_at -305 common clock_adjtime sys_clock_adjtime -306 common syncfs sys_syncfs -307 64 sendmmsg sys_sendmmsg -308 common setns sys_setns -309 common getcpu sys_getcpu -310 64 process_vm_readv sys_process_vm_readv -311 64 process_vm_writev sys_process_vm_writev -312 common kcmp sys_kcmp -313 common finit_module sys_finit_module -314 common sched_setattr sys_sched_setattr -315 common sched_getattr sys_sched_getattr -316 common renameat2 sys_renameat2 -317 common seccomp sys_seccomp -318 common getrandom sys_getrandom -319 common memfd_create sys_memfd_create -320 common kexec_file_load sys_kexec_file_load -321 common bpf sys_bpf -322 64 execveat sys_execveat/ptregs -323 common userfaultfd sys_userfaultfd -324 common membarrier sys_membarrier -325 common mlock2 sys_mlock2 -326 common copy_file_range sys_copy_file_range -327 64 preadv2 sys_preadv2 -328 64 pwritev2 sys_pwritev2 -329 common pkey_mprotect sys_pkey_mprotect -330 common pkey_alloc sys_pkey_alloc -331 common pkey_free sys_pkey_free -332 common statx sys_statx +237 common mbind __x64_sys_mbind +238 common set_mempolicy __x64_sys_set_mempolicy +239 common get_mempolicy __x64_sys_get_mempolicy +240 common mq_open __x64_sys_mq_open +241 common mq_unlink __x64_sys_mq_unlink +242 common mq_timedsend __x64_sys_mq_timedsend +243 common mq_timedreceive __x64_sys_mq_timedreceive +244 64 mq_notify __x64_sys_mq_notify +245 common mq_getsetattr __x64_sys_mq_getsetattr +246 64 kexec_load __x64_sys_kexec_load +247 64 waitid __x64_sys_waitid +248 common add_key __x64_sys_add_key +249 common request_key __x64_sys_request_key +250 common keyctl __x64_sys_keyctl +251 common ioprio_set __x64_sys_ioprio_set +252 common ioprio_get __x64_sys_ioprio_get +253 common inotify_init __x64_sys_inotify_init +254 common inotify_add_watch __x64_sys_inotify_add_watch +255 common inotify_rm_watch __x64_sys_inotify_rm_watch +256 common migrate_pages __x64_sys_migrate_pages +257 common openat __x64_sys_openat +258 common mkdirat __x64_sys_mkdirat +259 common mknodat __x64_sys_mknodat +260 common fchownat __x64_sys_fchownat +261 common futimesat __x64_sys_futimesat +262 common newfstatat __x64_sys_newfstatat +263 common unlinkat __x64_sys_unlinkat +264 common renameat __x64_sys_renameat +265 common linkat __x64_sys_linkat +266 common symlinkat __x64_sys_symlinkat +267 common readlinkat __x64_sys_readlinkat +268 common fchmodat __x64_sys_fchmodat +269 common faccessat __x64_sys_faccessat +270 common pselect6 __x64_sys_pselect6 +271 common ppoll __x64_sys_ppoll +272 common unshare __x64_sys_unshare +273 64 set_robust_list __x64_sys_set_robust_list +274 64 get_robust_list __x64_sys_get_robust_list +275 common splice __x64_sys_splice +276 common tee __x64_sys_tee +277 common sync_file_range __x64_sys_sync_file_range +278 64 vmsplice __x64_sys_vmsplice +279 64 move_pages __x64_sys_move_pages +280 common utimensat __x64_sys_utimensat +281 common epoll_pwait __x64_sys_epoll_pwait +282 common signalfd __x64_sys_signalfd +283 common timerfd_create __x64_sys_timerfd_create +284 common eventfd __x64_sys_eventfd +285 common fallocate __x64_sys_fallocate +286 common timerfd_settime __x64_sys_timerfd_settime +287 common timerfd_gettime __x64_sys_timerfd_gettime +288 common accept4 __x64_sys_accept4 +289 common signalfd4 __x64_sys_signalfd4 +290 common eventfd2 __x64_sys_eventfd2 +291 common epoll_create1 __x64_sys_epoll_create1 +292 common dup3 __x64_sys_dup3 +293 common pipe2 __x64_sys_pipe2 +294 common inotify_init1 __x64_sys_inotify_init1 +295 64 preadv __x64_sys_preadv +296 64 pwritev __x64_sys_pwritev +297 64 rt_tgsigqueueinfo __x64_sys_rt_tgsigqueueinfo +298 common perf_event_open __x64_sys_perf_event_open +299 64 recvmmsg __x64_sys_recvmmsg +300 common fanotify_init __x64_sys_fanotify_init +301 common fanotify_mark __x64_sys_fanotify_mark +302 common prlimit64 __x64_sys_prlimit64 +303 common name_to_handle_at __x64_sys_name_to_handle_at +304 common open_by_handle_at __x64_sys_open_by_handle_at +305 common clock_adjtime __x64_sys_clock_adjtime +306 common syncfs __x64_sys_syncfs +307 64 sendmmsg __x64_sys_sendmmsg +308 common setns __x64_sys_setns +309 common getcpu __x64_sys_getcpu +310 64 process_vm_readv __x64_sys_process_vm_readv +311 64 process_vm_writev __x64_sys_process_vm_writev +312 common kcmp __x64_sys_kcmp +313 common finit_module __x64_sys_finit_module +314 common sched_setattr __x64_sys_sched_setattr +315 common sched_getattr __x64_sys_sched_getattr +316 common renameat2 __x64_sys_renameat2 +317 common seccomp __x64_sys_seccomp +318 common getrandom __x64_sys_getrandom +319 common memfd_create __x64_sys_memfd_create +320 common kexec_file_load __x64_sys_kexec_file_load +321 common bpf __x64_sys_bpf +322 64 execveat __x64_sys_execveat/ptregs +323 common userfaultfd __x64_sys_userfaultfd +324 common membarrier __x64_sys_membarrier +325 common mlock2 __x64_sys_mlock2 +326 common copy_file_range __x64_sys_copy_file_range +327 64 preadv2 __x64_sys_preadv2 +328 64 pwritev2 __x64_sys_pwritev2 +329 common pkey_mprotect __x64_sys_pkey_mprotect +330 common pkey_alloc __x64_sys_pkey_alloc +331 common pkey_free __x64_sys_pkey_free +332 common statx __x64_sys_statx # # x32-specific system call numbers start at 512 to avoid cache impact -# for native 64-bit operation. +# for native 64-bit operation. The __x32_compat_sys stubs are created +# on-the-fly for compat_sys_*() compatibility system calls if X86_X32 +# is defined. # -512 x32 rt_sigaction compat_sys_rt_sigaction +512 x32 rt_sigaction __x32_compat_sys_rt_sigaction 513 x32 rt_sigreturn sys32_x32_rt_sigreturn -514 x32 ioctl compat_sys_ioctl -515 x32 readv compat_sys_readv -516 x32 writev compat_sys_writev -517 x32 recvfrom compat_sys_recvfrom -518 x32 sendmsg compat_sys_sendmsg -519 x32 recvmsg compat_sys_recvmsg -520 x32 execve compat_sys_execve/ptregs -521 x32 ptrace compat_sys_ptrace -522 x32 rt_sigpending compat_sys_rt_sigpending -523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait -524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo -525 x32 sigaltstack compat_sys_sigaltstack -526 x32 timer_create compat_sys_timer_create -527 x32 mq_notify compat_sys_mq_notify -528 x32 kexec_load compat_sys_kexec_load -529 x32 waitid compat_sys_waitid -530 x32 set_robust_list compat_sys_set_robust_list -531 x32 get_robust_list compat_sys_get_robust_list -532 x32 vmsplice compat_sys_vmsplice -533 x32 move_pages compat_sys_move_pages -534 x32 preadv compat_sys_preadv64 -535 x32 pwritev compat_sys_pwritev64 -536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo -537 x32 recvmmsg compat_sys_recvmmsg -538 x32 sendmmsg compat_sys_sendmmsg -539 x32 process_vm_readv compat_sys_process_vm_readv -540 x32 process_vm_writev compat_sys_process_vm_writev -541 x32 setsockopt compat_sys_setsockopt -542 x32 getsockopt compat_sys_getsockopt -543 x32 io_setup compat_sys_io_setup -544 x32 io_submit compat_sys_io_submit -545 x32 execveat compat_sys_execveat/ptregs -546 x32 preadv2 compat_sys_preadv64v2 -547 x32 pwritev2 compat_sys_pwritev64v2 +514 x32 ioctl __x32_compat_sys_ioctl +515 x32 readv __x32_compat_sys_readv +516 x32 writev __x32_compat_sys_writev +517 x32 recvfrom __x32_compat_sys_recvfrom +518 x32 sendmsg __x32_compat_sys_sendmsg +519 x32 recvmsg __x32_compat_sys_recvmsg +520 x32 execve __x32_compat_sys_execve/ptregs +521 x32 ptrace __x32_compat_sys_ptrace +522 x32 rt_sigpending __x32_compat_sys_rt_sigpending +523 x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait +524 x32 rt_sigqueueinfo __x32_compat_sys_rt_sigqueueinfo +525 x32 sigaltstack __x32_compat_sys_sigaltstack +526 x32 timer_create __x32_compat_sys_timer_create +527 x32 mq_notify __x32_compat_sys_mq_notify +528 x32 kexec_load __x32_compat_sys_kexec_load +529 x32 waitid __x32_compat_sys_waitid +530 x32 set_robust_list __x32_compat_sys_set_robust_list +531 x32 get_robust_list __x32_compat_sys_get_robust_list +532 x32 vmsplice __x32_compat_sys_vmsplice +533 x32 move_pages __x32_compat_sys_move_pages +534 x32 preadv __x32_compat_sys_preadv64 +535 x32 pwritev __x32_compat_sys_pwritev64 +536 x32 rt_tgsigqueueinfo __x32_compat_sys_rt_tgsigqueueinfo +537 x32 recvmmsg __x32_compat_sys_recvmmsg +538 x32 sendmmsg __x32_compat_sys_sendmmsg +539 x32 process_vm_readv __x32_compat_sys_process_vm_readv +540 x32 process_vm_writev __x32_compat_sys_process_vm_writev +541 x32 setsockopt __x32_compat_sys_setsockopt +542 x32 getsockopt __x32_compat_sys_getsockopt +543 x32 io_setup __x32_compat_sys_io_setup +544 x32 io_submit __x32_compat_sys_io_submit +545 x32 execveat __x32_compat_sys_execveat/ptregs +546 x32 preadv2 __x32_compat_sys_preadv64v2 +547 x32 pwritev2 __x32_compat_sys_pwritev64v2 diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 944070e98a2c..63eb49082774 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -175,7 +175,7 @@ static const struct option options[] = { OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run (default: 5 secs)"), OPT_UINTEGER('u', "usleep" , &p0.sleep_usecs, "usecs to sleep per loop iteration"), - OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via writes (can be mixed with -W)"), + OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via reads (can be mixed with -W)"), OPT_BOOLEAN('W', "data_writes" , &p0.data_writes, "access the data via writes (can be mixed with -R)"), OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards, "access the data backwards as well"), OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"), diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 4aca13f23b9d..1c41b4eaf73c 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -439,7 +439,7 @@ int cmd_help(int argc, const char **argv) #ifdef HAVE_LIBELF_SUPPORT "probe", #endif -#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE) +#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT) "trace", #endif NULL }; diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 506564651cda..57393e94d156 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -83,7 +83,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) }; argc = parse_options(argc, argv, options, record_mem_usage, - PARSE_OPT_STOP_AT_NON_OPTION); + PARSE_OPT_KEEP_UNKNOWN); rec_argc = argc + 9; /* max number of arguments */ rec_argv = calloc(rec_argc + 1, sizeof(char *)); @@ -436,7 +436,7 @@ int cmd_mem(int argc, const char **argv) } argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands, - mem_usage, PARSE_OPT_STOP_AT_NON_OPTION); + mem_usage, PARSE_OPT_KEEP_UNKNOWN); if (!argc || !(strncmp(argv[0], "rec", 3) || mem.operation)) usage_with_options(mem_usage, mem_options); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 313c42423393..e0a9845b6cbc 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -657,8 +657,11 @@ static int perf_sample__fprintf_start(struct perf_sample *sample, break; case PERF_RECORD_SWITCH: case PERF_RECORD_SWITCH_CPU_WIDE: - if (has(SWITCH_OUT)) + if (has(SWITCH_OUT)) { ret += fprintf(fp, "S"); + if (sample->misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT) + ret += fprintf(fp, "p"); + } default: break; } @@ -2801,11 +2804,11 @@ int find_scripts(char **scripts_array, char **scripts_path_array) for_each_lang(scripts_path, scripts_dir, lang_dirent) { scnprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path, lang_dirent->d_name); -#ifdef NO_LIBPERL +#ifndef HAVE_LIBPERL_SUPPORT if (strstr(lang_path, "perl")) continue; #endif -#ifdef NO_LIBPYTHON +#ifndef HAVE_LIBPYTHON_SUPPORT if (strstr(lang_path, "python")) continue; #endif diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f5c454855908..f17dc601b0f3 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -172,6 +172,7 @@ static bool interval_count; static const char *output_name; static int output_fd; static int print_free_counters_hint; +static int print_mixed_hw_group_error; struct perf_stat { bool record; @@ -1126,6 +1127,30 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); } +static bool is_mixed_hw_group(struct perf_evsel *counter) +{ + struct perf_evlist *evlist = counter->evlist; + u32 pmu_type = counter->attr.type; + struct perf_evsel *pos; + + if (counter->nr_members < 2) + return false; + + evlist__for_each_entry(evlist, pos) { + /* software events can be part of any hardware group */ + if (pos->attr.type == PERF_TYPE_SOFTWARE) + continue; + if (pmu_type == PERF_TYPE_SOFTWARE) { + pmu_type = pos->attr.type; + continue; + } + if (pmu_type != pos->attr.type) + return true; + } + + return false; +} + static void printout(int id, int nr, struct perf_evsel *counter, double uval, char *prefix, u64 run, u64 ena, double noise, struct runtime_stat *st) @@ -1178,8 +1203,11 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, csv_sep); - if (counter->supported) + if (counter->supported) { print_free_counters_hint = 1; + if (is_mixed_hw_group(counter)) + print_mixed_hw_group_error = 1; + } fprintf(stat_config.output, "%-*s%s", csv_output ? 0 : unit_width, @@ -1256,7 +1284,8 @@ static void uniquify_event_name(struct perf_evsel *counter) char *new_name; char *config; - if (!counter->pmu_name || !strncmp(counter->name, counter->pmu_name, + if (counter->uniquified_name || + !counter->pmu_name || !strncmp(counter->name, counter->pmu_name, strlen(counter->pmu_name))) return; @@ -1274,6 +1303,8 @@ static void uniquify_event_name(struct perf_evsel *counter) counter->name = new_name; } } + + counter->uniquified_name = true; } static void collect_all_aliases(struct perf_evsel *counter, @@ -1757,6 +1788,11 @@ static void print_footer(void) " echo 0 > /proc/sys/kernel/nmi_watchdog\n" " perf stat ...\n" " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); + + if (print_mixed_hw_group_error) + fprintf(output, + "The events in group usually have to be from " + "the same PMU. Try reorganizing the group.\n"); } static void print_counters(struct timespec *ts, int argc, const char **argv) @@ -1943,7 +1979,8 @@ static const struct option stat_options[] = { OPT_STRING(0, "post", &post_cmd, "command", "command to run after to the measured command"), OPT_UINTEGER('I', "interval-print", &stat_config.interval, - "print counts at regular interval in ms (>= 10)"), + "print counts at regular interval in ms " + "(overhead is possible for values <= 100ms)"), OPT_INTEGER(0, "interval-count", &stat_config.times, "print counts for fixed number of times"), OPT_UINTEGER(0, "timeout", &stat_config.timeout, @@ -2923,17 +2960,6 @@ int cmd_stat(int argc, const char **argv) } } - if (interval && interval < 100) { - if (interval < 10) { - pr_err("print interval must be >= 10ms\n"); - parse_options_usage(stat_usage, stat_options, "I", 1); - goto out; - } else - pr_warning("print interval < 100ms. " - "The overhead percentage could be high in some cases. " - "Please proceed with caution.\n"); - } - if (stat_config.times && interval) interval_count = true; else if (stat_config.times && !interval) { diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 87b95c9410b4..3ad17ee89403 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -112,6 +112,7 @@ struct trace { bool multiple_threads; bool summary; bool summary_only; + bool failure_only; bool show_comm; bool print_sample; bool show_tool_stats; @@ -1565,7 +1566,7 @@ static int trace__printf_interrupted_entry(struct trace *trace) struct thread_trace *ttrace; size_t printed; - if (trace->current == NULL) + if (trace->failure_only || trace->current == NULL) return 0; ttrace = thread__priv(trace->current); @@ -1638,7 +1639,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, args, trace, thread); if (sc->is_exit) { - if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) { + if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) { trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output); fprintf(trace->output, "%-70s)\n", ttrace->entry_str); } @@ -1742,7 +1743,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, } } - if (trace->summary_only) + if (trace->summary_only || (ret >= 0 && trace->failure_only)) goto out; trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output); @@ -1961,7 +1962,7 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, trace->output); } - fprintf(trace->output, ")\n"); + fprintf(trace->output, "\n"); if (callchain_ret > 0) trace__fprintf_callchain(trace, sample); @@ -3087,6 +3088,8 @@ int cmd_trace(int argc, const char **argv) OPT_INCR('v', "verbose", &verbose, "be more verbose"), OPT_BOOLEAN('T', "time", &trace.full_time, "Show full timestamp, not time relative to first start"), + OPT_BOOLEAN(0, "failure", &trace.failure_only, + "Show only syscalls that failed"), OPT_BOOLEAN('s', "summary", &trace.summary_only, "Show only syscall summary with statistics"), OPT_BOOLEAN('S', "with-summary", &trace.summary, diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c index 37019c5d675f..50df168be326 100644 --- a/tools/perf/builtin-version.c +++ b/tools/perf/builtin-version.c @@ -1,11 +1,94 @@ // SPDX-License-Identifier: GPL-2.0 #include "builtin.h" #include "perf.h" +#include "color.h" #include <linux/compiler.h> +#include <tools/config.h> #include <stdio.h> +#include <string.h> +#include <subcmd/parse-options.h> -int cmd_version(int argc __maybe_unused, const char **argv __maybe_unused) +int version_verbose; + +struct version { + bool build_options; +}; + +static struct version version; + +static struct option version_options[] = { + OPT_BOOLEAN(0, "build-options", &version.build_options, + "display the build options"), +}; + +static const char * const version_usage[] = { + "perf version [<options>]", + NULL +}; + +static void on_off_print(const char *status) +{ + printf("[ "); + + if (!strcmp(status, "OFF")) + color_fprintf(stdout, PERF_COLOR_RED, "%-3s", status); + else + color_fprintf(stdout, PERF_COLOR_GREEN, "%-3s", status); + + printf(" ]"); +} + +static void status_print(const char *name, const char *macro, + const char *status) { + printf("%22s: ", name); + on_off_print(status); + printf(" # %s\n", macro); +} + +#define STATUS(__d, __m) \ +do { \ + if (IS_BUILTIN(__d)) \ + status_print(#__m, #__d, "on"); \ + else \ + status_print(#__m, #__d, "OFF"); \ +} while (0) + +static void library_status(void) +{ + STATUS(HAVE_DWARF_SUPPORT, dwarf); + STATUS(HAVE_DWARF_GETLOCATIONS_SUPPORT, dwarf_getlocations); + STATUS(HAVE_GLIBC_SUPPORT, glibc); + STATUS(HAVE_GTK2_SUPPORT, gtk2); +#ifndef HAVE_SYSCALL_TABLE_SUPPORT + STATUS(HAVE_LIBAUDIT_SUPPORT, libaudit); +#endif + STATUS(HAVE_SYSCALL_TABLE_SUPPORT, syscall_table); + STATUS(HAVE_LIBBFD_SUPPORT, libbfd); + STATUS(HAVE_LIBELF_SUPPORT, libelf); + STATUS(HAVE_LIBNUMA_SUPPORT, libnuma); + STATUS(HAVE_LIBNUMA_SUPPORT, numa_num_possible_cpus); + STATUS(HAVE_LIBPERL_SUPPORT, libperl); + STATUS(HAVE_LIBPYTHON_SUPPORT, libpython); + STATUS(HAVE_SLANG_SUPPORT, libslang); + STATUS(HAVE_LIBCRYPTO_SUPPORT, libcrypto); + STATUS(HAVE_LIBUNWIND_SUPPORT, libunwind); + STATUS(HAVE_DWARF_SUPPORT, libdw-dwarf-unwind); + STATUS(HAVE_ZLIB_SUPPORT, zlib); + STATUS(HAVE_LZMA_SUPPORT, lzma); + STATUS(HAVE_AUXTRACE_SUPPORT, get_cpuid); + STATUS(HAVE_LIBBPF_SUPPORT, bpf); +} + +int cmd_version(int argc, const char **argv) +{ + argc = parse_options(argc, argv, version_options, version_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + printf("perf version %s\n", perf_version_string); + + if (version.build_options || version_verbose == 1) + library_status(); + return 0; } diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 1b3fc8ec0fa2..20a08cb32332 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -73,7 +73,7 @@ static struct cmd_struct commands[] = { { "lock", cmd_lock, 0 }, { "kvm", cmd_kvm, 0 }, { "test", cmd_test, 0 }, -#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE) +#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT) { "trace", cmd_trace, 0 }, #endif { "inject", cmd_inject, 0 }, @@ -190,6 +190,12 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) break; } + if (!strcmp(cmd, "-vv")) { + (*argv)[0] = "version"; + version_verbose = 1; + break; + } + /* * Check remaining flags. */ @@ -485,7 +491,7 @@ int main(int argc, const char **argv) argv[0] = cmd; } if (strstarts(cmd, "trace")) { -#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE) +#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT) setup_path(); argv[0] = "trace"; return cmd_trace(argc, argv); diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 8fec1abd0f1f..a1a97956136f 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -84,6 +84,7 @@ struct record_opts { struct option; extern const char * const *record_usage; extern struct option *record_options; +extern int version_verbose; int record__parse_freq(const struct option *opt, const char *str, int unset); #endif diff --git a/tools/perf/pmu-events/arch/s390/mapfile.csv b/tools/perf/pmu-events/arch/s390/mapfile.csv index ca7682748a4b..78bcf7f8e206 100644 --- a/tools/perf/pmu-events/arch/s390/mapfile.csv +++ b/tools/perf/pmu-events/arch/s390/mapfile.csv @@ -1,6 +1,6 @@ Family-model,Version,Filename,EventType -209[78],1,cf_z10,core -281[78],1,cf_z196,core -282[78],1,cf_zec12,core -296[45],1,cf_z13,core -3906,3,cf_z14,core +^IBM.209[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z10,core +^IBM.281[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z196,core +^IBM.282[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_zec12,core +^IBM.296[45].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z13,core +^IBM.390[67].*[13]\.[1-5].[[:xdigit:]]+$,3,cf_z14,core diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 93656f2fd53a..7e3cce3bcf3b 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -29,7 +29,6 @@ GenuineIntel-6-4D,v13,silvermont,core GenuineIntel-6-4C,v13,silvermont,core GenuineIntel-6-2A,v15,sandybridge,core GenuineIntel-6-2C,v2,westmereep-dp,core -GenuineIntel-6-2C,v2,westmereep-dp,core GenuineIntel-6-25,v2,westmereep-sp,core GenuineIntel-6-2F,v2,westmereex,core GenuineIntel-6-55,v1,skylakex,core diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/attr/test-record-group-sampling index f906b793196f..8a33ca4f9e1f 100644 --- a/tools/perf/tests/attr/test-record-group-sampling +++ b/tools/perf/tests/attr/test-record-group-sampling @@ -35,3 +35,6 @@ inherit=0 # sampling disabled sample_freq=0 sample_period=0 +freq=0 +write_backward=0 +sample_id_all=0 diff --git a/tools/perf/tests/bpf-script-example.c b/tools/perf/tests/bpf-script-example.c index e4123c1b0e88..1ca5106df5f1 100644 --- a/tools/perf/tests/bpf-script-example.c +++ b/tools/perf/tests/bpf-script-example.c @@ -31,7 +31,7 @@ struct bpf_map_def SEC("maps") flip_table = { .max_entries = 1, }; -SEC("func=SyS_epoll_pwait") +SEC("func=do_epoll_wait") int bpf_func__SyS_epoll_pwait(void *ctx) { int ind =0; diff --git a/tools/perf/tests/bpf-script-test-kbuild.c b/tools/perf/tests/bpf-script-test-kbuild.c index 3626924740d8..ff3ec8337f0a 100644 --- a/tools/perf/tests/bpf-script-test-kbuild.c +++ b/tools/perf/tests/bpf-script-test-kbuild.c @@ -9,7 +9,6 @@ #define SEC(NAME) __attribute__((section(NAME), used)) #include <uapi/linux/fs.h> -#include <uapi/asm/ptrace.h> SEC("func=vfs_llseek") int bpf_func__vfs_llseek(void *ctx) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 625f5a6772af..cac8f8889bc3 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -118,6 +118,7 @@ static struct test generic_tests[] = { { .desc = "Breakpoint accounting", .func = test__bp_accounting, + .is_supported = test__bp_signal_is_supported, }, { .desc = "Number of exit events of a simple workload", diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index bb8e6bcb0d96..0919b0793e5b 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -75,7 +75,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]); evsels[i] = perf_evsel__newtp("syscalls", name); if (IS_ERR(evsels[i])) { - pr_debug("perf_evsel__new\n"); + pr_debug("perf_evsel__new(%s)\n", name); goto out_delete_evlist; } diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index 1ecc1f0ff84a..ee86473643be 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -16,15 +16,13 @@ nm -g $libc 2>/dev/null | fgrep -q inet_pton || exit 254 trace_libc_inet_pton_backtrace() { idx=0 expected[0]="ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)" - expected[1]=".*inet_pton[[:space:]]\($libc\)$" + expected[1]=".*inet_pton[[:space:]]\($libc|inlined\)$" case "$(uname -m)" in s390x) - eventattr='call-graph=dwarf' + eventattr='call-graph=dwarf,max-stack=4' expected[2]="gaih_inet.*[[:space:]]\($libc|inlined\)$" - expected[3]="__GI_getaddrinfo[[:space:]]\($libc|inlined\)$" + expected[3]="(__GI_)?getaddrinfo[[:space:]]\($libc|inlined\)$" expected[4]="main[[:space:]]\(.*/bin/ping.*\)$" - expected[5]="__libc_start_main[[:space:]]\($libc\)$" - expected[6]="_start[[:space:]]\(.*/bin/ping.*\)$" ;; *) eventattr='max-stack=3' diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 17cb1bb3448c..40e30a26b23c 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -70,6 +70,27 @@ static int check_cpu_topology(char *path, struct cpu_map *map) session = perf_session__new(&data, false, NULL); TEST_ASSERT_VAL("can't get session", session); + /* On platforms with large numbers of CPUs process_cpu_topology() + * might issue an error while reading the perf.data file section + * HEADER_CPU_TOPOLOGY and the cpu_topology_map pointed to by member + * cpu is a NULL pointer. + * Example: On s390 + * CPU 0 is on core_id 0 and physical_package_id 6 + * CPU 1 is on core_id 1 and physical_package_id 3 + * + * Core_id and physical_package_id are platform and architecture + * dependend and might have higher numbers than the CPU id. + * This actually depends on the configuration. + * + * In this case process_cpu_topology() prints error message: + * "socket_id number is too big. You may need to upgrade the + * perf tool." + * + * This is the reason why this test might be skipped. + */ + if (!session->header.env.cpu) + return TEST_SKIP; + for (i = 0; i < session->header.env.nr_cpus_avail; i++) { if (!cpu_map__has(map, i)) continue; @@ -95,7 +116,7 @@ int test__session_topology(struct test *test __maybe_unused, int subtest __maybe { char path[PATH_MAX]; struct cpu_map *map; - int ret = -1; + int ret = TEST_FAIL; TEST_ASSERT_VAL("can't get templ file", !get_temp(path)); @@ -110,12 +131,9 @@ int test__session_topology(struct test *test __maybe_unused, int subtest __maybe goto free_path; } - if (check_cpu_topology(path, map)) - goto free_map; - ret = 0; - -free_map: + ret = check_cpu_topology(path, map); cpu_map__put(map); + free_path: unlink(path); return ret; diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c index 417e3ecfe9d7..9f68077b241b 100644 --- a/tools/perf/trace/beauty/mmap.c +++ b/tools/perf/trace/beauty/mmap.c @@ -54,6 +54,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, P_MMAP_FLAG(EXECUTABLE); P_MMAP_FLAG(FILE); P_MMAP_FLAG(FIXED); +#ifdef MAP_FIXED_NOREPLACE + P_MMAP_FLAG(FIXED_NOREPLACE); +#endif P_MMAP_FLAG(GROWSDOWN); P_MMAP_FLAG(HUGETLB); P_MMAP_FLAG(LOCKED); diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 9f6ce29b83b4..4f75561424ed 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -45,11 +45,16 @@ void ui_browser__set_percent_color(struct ui_browser *browser, ui_browser__set_color(browser, color); } -void ui_browser__gotorc(struct ui_browser *browser, int y, int x) +void ui_browser__gotorc_title(struct ui_browser *browser, int y, int x) { SLsmg_gotorc(browser->y + y, browser->x + x); } +void ui_browser__gotorc(struct ui_browser *browser, int y, int x) +{ + SLsmg_gotorc(browser->y + y + browser->extra_title_lines, browser->x + x); +} + void ui_browser__write_nstring(struct ui_browser *browser __maybe_unused, const char *msg, unsigned int width) { @@ -191,6 +196,7 @@ void ui_browser__refresh_dimensions(struct ui_browser *browser) { browser->width = SLtt_Screen_Cols - 1; browser->height = browser->rows = SLtt_Screen_Rows - 2; + browser->rows -= browser->extra_title_lines; browser->y = 1; browser->x = 0; } @@ -337,8 +343,8 @@ static int __ui_browser__refresh(struct ui_browser *browser) else width += 1; - SLsmg_fill_region(browser->y + row, browser->x, - browser->height - row, width, ' '); + SLsmg_fill_region(browser->y + row + browser->extra_title_lines, browser->x, + browser->rows - row, width, ' '); return 0; } diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index 70057178ee34..aa5932e1d62e 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -17,6 +17,7 @@ struct ui_browser { u64 index, top_idx; void *top, *entries; u16 y, x, width, height, rows, columns, horiz_scroll; + u8 extra_title_lines; int current_color; void *priv; const char *title; @@ -38,6 +39,7 @@ bool ui_browser__is_current_entry(struct ui_browser *browser, unsigned row); void ui_browser__refresh_dimensions(struct ui_browser *browser); void ui_browser__reset_index(struct ui_browser *browser); +void ui_browser__gotorc_title(struct ui_browser *browser, int y, int x); void ui_browser__gotorc(struct ui_browser *browser, int y, int x); void ui_browser__write_nstring(struct ui_browser *browser, const char *msg, unsigned int width); diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index c02fb437ac8e..3781d74088a7 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -218,7 +218,7 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) annotate_browser__draw_current_jump(browser); ui_browser__set_color(browser, HE_COLORSET_NORMAL); - __ui_browser__vline(browser, pcnt_width, 0, browser->height - 1); + __ui_browser__vline(browser, pcnt_width, 0, browser->rows - 1); return ret; } @@ -592,21 +592,40 @@ bool annotate_browser__continue_search_reverse(struct annotate_browser *browser, return __annotate_browser__search_reverse(browser); } +static int annotate_browser__show(struct ui_browser *browser, char *title, const char *help) +{ + struct map_symbol *ms = browser->priv; + struct symbol *sym = ms->sym; + char symbol_dso[SYM_TITLE_MAX_SIZE]; + + if (ui_browser__show(browser, title, help) < 0) + return -1; + + sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso)); + + ui_browser__gotorc_title(browser, 0, 0); + ui_browser__set_color(browser, HE_COLORSET_ROOT); + ui_browser__write_nstring(browser, symbol_dso, browser->width + 1); + return 0; +} + static int annotate_browser__run(struct annotate_browser *browser, struct perf_evsel *evsel, struct hist_browser_timer *hbt) { struct rb_node *nd = NULL; + struct hists *hists = evsel__hists(evsel); struct map_symbol *ms = browser->b.priv; struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(ms->sym); const char *help = "Press 'h' for help on key bindings"; int delay_secs = hbt ? hbt->refresh : 0; + char title[256]; int key; - char title[SYM_TITLE_MAX_SIZE]; - sym_title(sym, ms->map, title, sizeof(title)); - if (ui_browser__show(&browser->b, title, help) < 0) + annotation__scnprintf_samples_period(notes, title, sizeof(title), evsel); + + if (annotate_browser__show(&browser->b, title, help) < 0) return -1; annotate_browser__calc_percent(browser, evsel); @@ -637,8 +656,11 @@ static int annotate_browser__run(struct annotate_browser *browser, if (hbt) hbt->timer(hbt->arg); - if (delay_secs != 0) + if (delay_secs != 0) { symbol__annotate_decay_histogram(sym, evsel->idx); + hists__scnprintf_title(hists, title, sizeof(title)); + annotate_browser__show(&browser->b, title, help); + } continue; case K_TAB: if (nd != NULL) { @@ -670,6 +692,7 @@ static int annotate_browser__run(struct annotate_browser *browser, "J Toggle showing number of jump sources on targets\n" "n Search next string\n" "o Toggle disassembler output/simplified view\n" + "O Bump offset level (jump targets -> +call -> all -> cycle thru)\n" "s Toggle source code view\n" "t Circulate percent, total period, samples view\n" "/ Search string\n" @@ -697,6 +720,10 @@ static int annotate_browser__run(struct annotate_browser *browser, notes->options->use_offset = !notes->options->use_offset; annotation__update_column_widths(notes); continue; + case 'O': + if (++notes->options->offset_level > ANNOTATION__MAX_OFFSET_LEVEL) + notes->options->offset_level = ANNOTATION__MIN_OFFSET_LEVEL; + continue; case 'j': notes->options->jump_arrows = !notes->options->jump_arrows; continue; @@ -812,6 +839,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, .seek = ui_browser__list_head_seek, .write = annotate_browser__write, .filter = disasm_line__filter, + .extra_title_lines = 1, /* for hists__scnprintf_title() */ .priv = &ms, .use_navkeypressed = true, }, diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 8b4e82548f8e..e5f247247daa 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -32,8 +32,7 @@ extern void hist_browser__init_hpp(void); -static int perf_evsel_browser_title(struct hist_browser *browser, - char *bf, size_t size); +static int hists_browser__scnprintf_title(struct hist_browser *browser, char *bf, size_t size); static void hist_browser__update_nr_entries(struct hist_browser *hb); static struct rb_node *hists__filter_entries(struct rb_node *nd, @@ -62,6 +61,15 @@ static int hist_browser__get_folding(struct hist_browser *browser) return unfolded_rows; } +static void hist_browser__set_title_space(struct hist_browser *hb) +{ + struct ui_browser *browser = &hb->b; + struct hists *hists = hb->hists; + struct perf_hpp_list *hpp_list = hists->hpp_list; + + browser->extra_title_lines = hb->show_headers ? hpp_list->nr_header_lines : 0; +} + static u32 hist_browser__nr_entries(struct hist_browser *hb) { u32 nr_entries; @@ -82,10 +90,16 @@ static void hist_browser__update_rows(struct hist_browser *hb) struct ui_browser *browser = &hb->b; struct hists *hists = hb->hists; struct perf_hpp_list *hpp_list = hists->hpp_list; - u16 header_offset, index_row; + u16 index_row; - header_offset = hb->show_headers ? hpp_list->nr_header_lines : 0; - browser->rows = browser->height - header_offset; + if (!hb->show_headers) { + browser->rows += browser->extra_title_lines; + browser->extra_title_lines = 0; + return; + } + + browser->extra_title_lines = hpp_list->nr_header_lines; + browser->rows -= browser->extra_title_lines; /* * Verify if we were at the last line and that line isn't * visibe because we now show the header line(s). @@ -108,17 +122,6 @@ static void hist_browser__refresh_dimensions(struct ui_browser *browser) * changeset. */ ui_browser__refresh_dimensions(browser); - hist_browser__update_rows(hb); -} - -static void hist_browser__gotorc(struct hist_browser *browser, int row, int column) -{ - struct hists *hists = browser->hists; - struct perf_hpp_list *hpp_list = hists->hpp_list; - u16 header_offset; - - header_offset = browser->show_headers ? hpp_list->nr_header_lines : 0; - ui_browser__gotorc(&browser->b, row + header_offset, column); } static void hist_browser__reset(struct hist_browser *browser) @@ -656,9 +659,10 @@ int hist_browser__run(struct hist_browser *browser, const char *help, struct hist_entry *h = rb_entry(browser->b.top, struct hist_entry, rb_node); ui_helpline__pop(); - ui_helpline__fpush("%d: nr_ent=(%d,%d), rows=%d, idx=%d, fve: idx=%d, row_off=%d, nrows=%d", + ui_helpline__fpush("%d: nr_ent=(%d,%d), etl: %d, rows=%d, idx=%d, fve: idx=%d, row_off=%d, nrows=%d", seq++, browser->b.nr_entries, browser->hists->nr_entries, + browser->b.extra_title_lines, browser->b.rows, browser->b.index, browser->b.top_idx, @@ -733,7 +737,7 @@ static void hist_browser__show_callchain_entry(struct hist_browser *browser, } ui_browser__set_color(&browser->b, color); - hist_browser__gotorc(browser, row, 0); + ui_browser__gotorc(&browser->b, row, 0); ui_browser__write_nstring(&browser->b, " ", offset); ui_browser__printf(&browser->b, "%c", folded_sign); ui_browser__write_graph(&browser->b, show_annotated ? SLSMG_RARROW_CHAR : ' '); @@ -1249,7 +1253,7 @@ static int hist_browser__show_entry(struct hist_browser *browser, }; int column = 0; - hist_browser__gotorc(browser, row, 0); + ui_browser__gotorc(&browser->b, row, 0); hists__for_each_format(browser->hists, fmt) { char s[2048]; @@ -1358,7 +1362,7 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, goto show_callchain; } - hist_browser__gotorc(browser, row, 0); + ui_browser__gotorc(&browser->b, row, 0); if (current_entry && browser->b.navkeypressed) ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED); @@ -1507,7 +1511,7 @@ static int hist_browser__show_no_entry(struct hist_browser *browser, browser->selection = NULL; } - hist_browser__gotorc(browser, row, 0); + ui_browser__gotorc(&browser->b, row, 0); if (current_entry && browser->b.navkeypressed) ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED); @@ -1713,7 +1717,7 @@ static void hists_browser__headers(struct hist_browser *browser) hists_browser__scnprintf_headers(browser, headers, sizeof(headers), line); - ui_browser__gotorc(&browser->b, line, 0); + ui_browser__gotorc_title(&browser->b, line, 0); ui_browser__set_color(&browser->b, HE_COLORSET_ROOT); ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1); } @@ -1740,17 +1744,11 @@ static void ui_browser__hists_init_top(struct ui_browser *browser) static unsigned int hist_browser__refresh(struct ui_browser *browser) { unsigned row = 0; - u16 header_offset = 0; struct rb_node *nd; struct hist_browser *hb = container_of(browser, struct hist_browser, b); - struct hists *hists = hb->hists; - - if (hb->show_headers) { - struct perf_hpp_list *hpp_list = hists->hpp_list; + if (hb->show_headers) hist_browser__show_headers(hb); - header_offset = hpp_list->nr_header_lines; - } ui_browser__hists_init_top(browser); hb->he_selection = NULL; @@ -1788,7 +1786,7 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser) break; } - return row + header_offset; + return row; } static struct rb_node *hists__filter_entries(struct rb_node *nd, @@ -2143,6 +2141,7 @@ void hist_browser__init(struct hist_browser *browser, browser->b.seek = ui_browser__hists_seek; browser->b.use_navkeypressed = true; browser->show_headers = symbol_conf.show_hist_headers; + hist_browser__set_title_space(browser); if (symbol_conf.report_hierarchy) { struct perf_hpp_list_node *fmt_node; @@ -2183,7 +2182,7 @@ perf_evsel_browser__new(struct perf_evsel *evsel, if (browser) { browser->hbt = hbt; browser->env = env; - browser->title = perf_evsel_browser_title; + browser->title = hists_browser__scnprintf_title; } return browser; } @@ -2209,84 +2208,11 @@ static inline bool is_report_browser(void *timer) return timer == NULL; } -static int perf_evsel_browser_title(struct hist_browser *browser, - char *bf, size_t size) +static int hists_browser__scnprintf_title(struct hist_browser *browser, char *bf, size_t size) { struct hist_browser_timer *hbt = browser->hbt; - struct hists *hists = browser->hists; - char unit; - int printed; - const struct dso *dso = hists->dso_filter; - const struct thread *thread = hists->thread_filter; - int socket_id = hists->socket_filter; - unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; - u64 nr_events = hists->stats.total_period; - struct perf_evsel *evsel = hists_to_evsel(hists); - const char *ev_name = perf_evsel__name(evsel); - char buf[512], sample_freq_str[64] = ""; - size_t buflen = sizeof(buf); - char ref[30] = " show reference callgraph, "; - bool enable_ref = false; + int printed = __hists__scnprintf_title(browser->hists, bf, size, !is_report_browser(hbt)); - if (symbol_conf.filter_relative) { - nr_samples = hists->stats.nr_non_filtered_samples; - nr_events = hists->stats.total_non_filtered_period; - } - - if (perf_evsel__is_group_event(evsel)) { - struct perf_evsel *pos; - - perf_evsel__group_desc(evsel, buf, buflen); - ev_name = buf; - - for_each_group_member(pos, evsel) { - struct hists *pos_hists = evsel__hists(pos); - - if (symbol_conf.filter_relative) { - nr_samples += pos_hists->stats.nr_non_filtered_samples; - nr_events += pos_hists->stats.total_non_filtered_period; - } else { - nr_samples += pos_hists->stats.nr_events[PERF_RECORD_SAMPLE]; - nr_events += pos_hists->stats.total_period; - } - } - } - - if (symbol_conf.show_ref_callgraph && - strstr(ev_name, "call-graph=no")) - enable_ref = true; - - if (!is_report_browser(hbt)) - scnprintf(sample_freq_str, sizeof(sample_freq_str), " %d Hz,", evsel->attr.sample_freq); - - nr_samples = convert_unit(nr_samples, &unit); - printed = scnprintf(bf, size, - "Samples: %lu%c of event%s '%s',%s%sEvent count (approx.): %" PRIu64, - nr_samples, unit, evsel->nr_members > 1 ? "s" : "", - ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events); - - - if (hists->uid_filter_str) - printed += snprintf(bf + printed, size - printed, - ", UID: %s", hists->uid_filter_str); - if (thread) { - if (hists__has(hists, thread)) { - printed += scnprintf(bf + printed, size - printed, - ", Thread: %s(%d)", - (thread->comm_set ? thread__comm_str(thread) : ""), - thread->tid); - } else { - printed += scnprintf(bf + printed, size - printed, - ", Thread: %s", - (thread->comm_set ? thread__comm_str(thread) : "")); - } - } - if (dso) - printed += scnprintf(bf + printed, size - printed, - ", DSO: %s", dso->short_name); - if (socket_id > -1) - printed += scnprintf(bf + printed, size - printed, - ", Processor Socket: %d", socket_id); if (!is_report_browser(hbt)) { struct perf_top *top = hbt->arg; @@ -2788,7 +2714,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, "h/?/F1 Show this window\n" \ "UP/DOWN/PGUP\n" \ "PGDN/SPACE Navigate\n" \ - "q/ESC/CTRL+C Exit browser\n\n" \ + "q/ESC/CTRL+C Exit browser or go back to previous screen\n\n" \ "For multiple event sessions:\n\n" \ "TAB/UNTAB Switch events\n\n" \ "For symbolic views (--sort has sym):\n\n" \ diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 3a428d7c59b9..5d74a30fe00f 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -17,6 +17,7 @@ #include "config.h" #include "cache.h" #include "symbol.h" +#include "units.h" #include "debug.h" #include "annotate.h" #include "evsel.h" @@ -45,6 +46,7 @@ struct annotation_options annotation__default_options = { .use_offset = true, .jump_arrows = true, + .offset_level = ANNOTATION__OFFSET_JUMP_TARGETS, }; const char *disassembler_style; @@ -1261,6 +1263,9 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start max_percent = sample->percent; } + if (al->samples_nr > nr_percent) + nr_percent = al->samples_nr; + if (max_percent < min_pcnt) return -1; @@ -2324,7 +2329,7 @@ int symbol__tty_annotate2(struct symbol *sym, struct map *map, struct dso *dso = map->dso; struct rb_root source_line = RB_ROOT; struct annotation_options opts = annotation__default_options; - const char *ev_name = perf_evsel__name(evsel); + struct annotation *notes = symbol__annotation(sym); char buf[1024]; if (symbol__annotate2(sym, map, evsel, &opts, NULL) < 0) @@ -2336,12 +2341,8 @@ int symbol__tty_annotate2(struct symbol *sym, struct map *map, print_summary(&source_line, dso->long_name); } - if (perf_evsel__is_group_event(evsel)) { - perf_evsel__group_desc(evsel, buf, sizeof(buf)); - ev_name = buf; - } - - fprintf(stdout, "%s() %s\nEvent: %s\n\n", sym->name, dso->long_name, ev_name); + annotation__scnprintf_samples_period(notes, buf, sizeof(buf), evsel); + fprintf(stdout, "%s\n%s() %s\n", buf, sym->name, dso->long_name); symbol__annotate_fprintf2(sym, stdout); annotated_source__purge(symbol__annotation(sym)->src); @@ -2515,7 +2516,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati if (!notes->options->use_offset) { printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr); } else { - if (al->jump_sources) { + if (al->jump_sources && + notes->options->offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) { if (notes->options->show_nr_jumps) { int prev; printed = scnprintf(bf, sizeof(bf), "%*d ", @@ -2526,9 +2528,14 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati obj__printf(obj, bf); obj__set_color(obj, prev); } - +print_addr: printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ", notes->widths.target, addr); + } else if (ins__is_call(&disasm_line(al)->ins) && + notes->options->offset_level >= ANNOTATION__OFFSET_CALL) { + goto print_addr; + } else if (notes->options->offset_level == ANNOTATION__MAX_OFFSET_LEVEL) { + goto print_addr; } else { printed = scnprintf(bf, sizeof(bf), "%-*s ", notes->widths.addr, " "); @@ -2597,6 +2604,46 @@ out_free_offsets: return -1; } +int __annotation__scnprintf_samples_period(struct annotation *notes, + char *bf, size_t size, + struct perf_evsel *evsel, + bool show_freq) +{ + const char *ev_name = perf_evsel__name(evsel); + char buf[1024], ref[30] = " show reference callgraph, "; + char sample_freq_str[64] = ""; + unsigned long nr_samples = 0; + int nr_members = 1; + bool enable_ref = false; + u64 nr_events = 0; + char unit; + int i; + + if (perf_evsel__is_group_event(evsel)) { + perf_evsel__group_desc(evsel, buf, sizeof(buf)); + ev_name = buf; + nr_members = evsel->nr_members; + } + + for (i = 0; i < nr_members; i++) { + struct sym_hist *ah = annotation__histogram(notes, evsel->idx + i); + + nr_samples += ah->nr_samples; + nr_events += ah->period; + } + + if (symbol_conf.show_ref_callgraph && strstr(ev_name, "call-graph=no")) + enable_ref = true; + + if (show_freq) + scnprintf(sample_freq_str, sizeof(sample_freq_str), " %d Hz,", evsel->attr.sample_freq); + + nr_samples = convert_unit(nr_samples, &unit); + return scnprintf(bf, size, "Samples: %lu%c of event%s '%s',%s%sEvent count (approx.): %" PRIu64, + nr_samples, unit, evsel->nr_members > 1 ? "s" : "", + ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events); +} + #define ANNOTATION__CFG(n) \ { .name = #n, .value = &annotation__default_options.n, } @@ -2605,10 +2652,11 @@ out_free_offsets: */ static struct annotation_config { const char *name; - bool *value; + void *value; } annotation__configs[] = { ANNOTATION__CFG(hide_src_code), ANNOTATION__CFG(jump_arrows), + ANNOTATION__CFG(offset_level), ANNOTATION__CFG(show_linenr), ANNOTATION__CFG(show_nr_jumps), ANNOTATION__CFG(show_nr_samples), @@ -2640,8 +2688,16 @@ static int annotation__config(const char *var, const char *value, if (cfg == NULL) pr_debug("%s variable unknown, ignoring...", var); - else - *cfg->value = perf_config_bool(name, value); + else if (strcmp(var, "annotate.offset_level") == 0) { + perf_config_int(cfg->value, name, value); + + if (*(int *)cfg->value > ANNOTATION__MAX_OFFSET_LEVEL) + *(int *)cfg->value = ANNOTATION__MAX_OFFSET_LEVEL; + else if (*(int *)cfg->value < ANNOTATION__MIN_OFFSET_LEVEL) + *(int *)cfg->value = ANNOTATION__MIN_OFFSET_LEVEL; + } else { + *(bool *)cfg->value = perf_config_bool(name, value); + } return 0; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index ff7e3df31efa..f28a9e43421d 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -70,8 +70,17 @@ struct annotation_options { show_nr_jumps, show_nr_samples, show_total_period; + u8 offset_level; }; +enum { + ANNOTATION__OFFSET_JUMP_TARGETS = 1, + ANNOTATION__OFFSET_CALL, + ANNOTATION__MAX_OFFSET_LEVEL, +}; + +#define ANNOTATION__MIN_OFFSET_LEVEL ANNOTATION__OFFSET_JUMP_TARGETS + extern struct annotation_options annotation__default_options; struct annotation; @@ -151,6 +160,18 @@ double annotation_line__max_percent(struct annotation_line *al, struct annotatio void annotation_line__write(struct annotation_line *al, struct annotation *notes, struct annotation_write_ops *ops); +int __annotation__scnprintf_samples_period(struct annotation *notes, + char *bf, size_t size, + struct perf_evsel *evsel, + bool show_freq); + +static inline int annotation__scnprintf_samples_period(struct annotation *notes, + char *bf, size_t size, + struct perf_evsel *evsel) +{ + return __annotation__scnprintf_samples_period(notes, bf, size, evsel, true); +} + int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw); size_t disasm__fprintf(struct list_head *head, FILE *fp); void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel); diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index fb357a00dd86..857de69a5361 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -302,13 +302,27 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues, return 0; } +static bool filter_cpu(struct perf_session *session, int cpu) +{ + unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap; + + return cpu_bitmap && cpu != -1 && !test_bit(cpu, cpu_bitmap); +} + static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues, struct perf_session *session, unsigned int idx, struct auxtrace_buffer *buffer, struct auxtrace_buffer **buffer_ptr) { - int err; + int err = -ENOMEM; + + if (filter_cpu(session, buffer->cpu)) + return 0; + + buffer = memdup(buffer, sizeof(*buffer)); + if (!buffer) + return -ENOMEM; if (session->one_mmap) { buffer->data = buffer->data_offset - session->one_mmap_offset + @@ -316,31 +330,28 @@ static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues, } else if (perf_data__is_pipe(session->data)) { buffer->data = auxtrace_copy_data(buffer->size, session); if (!buffer->data) - return -ENOMEM; + goto out_free; buffer->data_needs_freeing = true; } else if (BITS_PER_LONG == 32 && buffer->size > BUFFER_LIMIT_FOR_32_BIT) { err = auxtrace_queues__split_buffer(queues, idx, buffer); if (err) - return err; + goto out_free; } err = auxtrace_queues__queue_buffer(queues, idx, buffer); if (err) - return err; + goto out_free; /* FIXME: Doesn't work for split buffer */ if (buffer_ptr) *buffer_ptr = buffer; return 0; -} -static bool filter_cpu(struct perf_session *session, int cpu) -{ - unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap; - - return cpu_bitmap && cpu != -1 && !test_bit(cpu, cpu_bitmap); +out_free: + auxtrace_buffer__free(buffer); + return err; } int auxtrace_queues__add_event(struct auxtrace_queues *queues, @@ -348,36 +359,19 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues, union perf_event *event, off_t data_offset, struct auxtrace_buffer **buffer_ptr) { - struct auxtrace_buffer *buffer; - unsigned int idx; - int err; - - if (filter_cpu(session, event->auxtrace.cpu)) - return 0; - - buffer = zalloc(sizeof(struct auxtrace_buffer)); - if (!buffer) - return -ENOMEM; - - buffer->pid = -1; - buffer->tid = event->auxtrace.tid; - buffer->cpu = event->auxtrace.cpu; - buffer->data_offset = data_offset; - buffer->offset = event->auxtrace.offset; - buffer->reference = event->auxtrace.reference; - buffer->size = event->auxtrace.size; - idx = event->auxtrace.idx; - - err = auxtrace_queues__add_buffer(queues, session, idx, buffer, - buffer_ptr); - if (err) - goto out_err; - - return 0; + struct auxtrace_buffer buffer = { + .pid = -1, + .tid = event->auxtrace.tid, + .cpu = event->auxtrace.cpu, + .data_offset = data_offset, + .offset = event->auxtrace.offset, + .reference = event->auxtrace.reference, + .size = event->auxtrace.size, + }; + unsigned int idx = event->auxtrace.idx; -out_err: - auxtrace_buffer__free(buffer); - return err; + return auxtrace_queues__add_buffer(queues, session, idx, &buffer, + buffer_ptr); } static int auxtrace_queues__add_indexed_event(struct auxtrace_queues *queues, diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index af7ad814b2c3..cee658733e2c 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -66,7 +66,7 @@ bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name) } obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, name); - if (IS_ERR(obj)) { + if (IS_ERR_OR_NULL(obj)) { pr_debug("bpf: failed to load buffer\n"); return ERR_PTR(-EINVAL); } @@ -102,14 +102,14 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source) pr_debug("bpf: successfull builtin compilation\n"); obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename); - if (!IS_ERR(obj) && llvm_param.dump_obj) + if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj) llvm__dump_obj(filename, obj_buf, obj_buf_sz); free(obj_buf); } else obj = bpf_object__open(filename); - if (IS_ERR(obj)) { + if (IS_ERR_OR_NULL(obj)) { pr_debug("bpf: failed to load %s\n", filename); return obj; } diff --git a/tools/perf/util/c++/clang-test.cpp b/tools/perf/util/c++/clang-test.cpp index a4014d786676..7b042a5ebc68 100644 --- a/tools/perf/util/c++/clang-test.cpp +++ b/tools/perf/util/c++/clang-test.cpp @@ -41,7 +41,7 @@ int test__clang_to_IR(void) if (!M) return -1; for (llvm::Function& F : *M) - if (F.getName() == "bpf_func__SyS_epoll_wait") + if (F.getName() == "bpf_func__SyS_epoll_pwait") return 0; return -1; } diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp index 1bfc946e37dc..bf31ceab33bd 100644 --- a/tools/perf/util/c++/clang.cpp +++ b/tools/perf/util/c++/clang.cpp @@ -9,6 +9,7 @@ * Copyright (C) 2016 Huawei Inc. */ +#include "clang/Basic/Version.h" #include "clang/CodeGen/CodeGenAction.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/CompilerInstance.h" @@ -58,7 +59,8 @@ createCompilerInvocation(llvm::opt::ArgStringList CFlags, StringRef& Path, FrontendOptions& Opts = CI->getFrontendOpts(); Opts.Inputs.clear(); - Opts.Inputs.emplace_back(Path, IK_C); + Opts.Inputs.emplace_back(Path, + FrontendOptions::getInputKindForExtension("c")); return CI; } @@ -71,10 +73,17 @@ getModuleFromSource(llvm::opt::ArgStringList CFlags, Clang.setVirtualFileSystem(&*VFS); +#if CLANG_VERSION_MAJOR < 4 IntrusiveRefCntPtr<CompilerInvocation> CI = createCompilerInvocation(std::move(CFlags), Path, Clang.getDiagnostics()); Clang.setInvocation(&*CI); +#else + std::shared_ptr<CompilerInvocation> CI( + createCompilerInvocation(std::move(CFlags), Path, + Clang.getDiagnostics())); + Clang.setInvocation(CI); +#endif std::unique_ptr<CodeGenAction> Act(new EmitLLVMOnlyAction(&*LLVMCtx)); if (!Clang.ExecuteAction(*Act)) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 640af88331b4..4d5fc374e730 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * SPDX-License-Identifier: GPL-2.0 - * * Copyright(C) 2015-2018 Linaro Limited. * * Author: Tor Jeremiassen <tor@ti.com> @@ -97,11 +96,19 @@ int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, /* Nothing to do, might as well just return */ if (decoder->packet_count == 0) return 0; + /* + * The queueing process in function cs_etm_decoder__buffer_packet() + * increments the tail *before* using it. This is somewhat counter + * intuitive but it has the advantage of centralizing tail management + * at a single location. Because of that we need to follow the same + * heuristic with the head, i.e we increment it before using its + * value. Otherwise the first element of the packet queue is not + * used. + */ + decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1); *packet = decoder->packet_buffer[decoder->head]; - decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1); - decoder->packet_count--; return 1; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 1b0d422373be..bf16dc9ee507 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * SPDX-License-Identifier: GPL-2.0 - * * Copyright(C) 2015-2018 Linaro Limited. * * Author: Tor Jeremiassen <tor@ti.com> @@ -240,6 +239,7 @@ static void cs_etm__free(struct perf_session *session) for (i = 0; i < aux->num_cpu; i++) zfree(&aux->metadata[i]); + thread__zput(aux->unknown_thread); zfree(&aux->metadata); zfree(&aux); } @@ -613,8 +613,8 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) return buff->len; } -static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, - struct auxtrace_queue *queue) +static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, + struct auxtrace_queue *queue) { struct cs_etm_queue *etmq = queue->priv; @@ -1358,6 +1358,23 @@ int cs_etm__process_auxtrace_info(union perf_event *event, etm->auxtrace.free = cs_etm__free; session->auxtrace = &etm->auxtrace; + etm->unknown_thread = thread__new(999999999, 999999999); + if (!etm->unknown_thread) + goto err_free_queues; + + /* + * Initialize list node so that at thread__zput() we can avoid + * segmentation fault at list_del_init(). + */ + INIT_LIST_HEAD(&etm->unknown_thread->node); + + err = thread__set_comm(etm->unknown_thread, "unknown", 0); + if (err) + goto err_delete_thread; + + if (thread__init_map_groups(etm->unknown_thread, etm->machine)) + goto err_delete_thread; + if (dump_trace) { cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); return 0; @@ -1372,16 +1389,18 @@ int cs_etm__process_auxtrace_info(union perf_event *event, err = cs_etm__synth_events(etm, session); if (err) - goto err_free_queues; + goto err_delete_thread; err = auxtrace_queues__process_index(&etm->queues, session); if (err) - goto err_free_queues; + goto err_delete_thread; etm->data_queued = etm->queues.populated; return 0; +err_delete_thread: + thread__zput(etm->unknown_thread); err_free_queues: auxtrace_queues__free(&etm->queues); session->auxtrace = NULL; diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 5864d5dca616..37f8d48179ca 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -1,18 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright(C) 2015 Linaro Limited. All rights reserved. * Author: Mathieu Poirier <mathieu.poirier@linaro.org> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef INCLUDE__UTIL_PERF_CS_ETM_H__ diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index f5acda13dcfa..7eb7de5aee44 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -979,7 +979,7 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf) return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die)); } -#ifdef HAVE_DWARF_GETLOCATIONS +#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT /** * die_get_var_innermost_scope - Get innermost scope range of given variable DIE * @sp_die: a subprogram DIE diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index f0a6cbd033cc..98ff3a6a3d50 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1421,7 +1421,9 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp) size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp) { bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; - const char *in_out = out ? "OUT" : "IN "; + const char *in_out = !out ? "IN " : + !(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT) ? + "OUT " : "OUT preempt"; if (event->header.type == PERF_RECORD_SWITCH) return fprintf(fp, " %s\n", in_out); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1ac8d9236efd..4cd2cf93f726 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -930,8 +930,11 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, * than leader in case leader 'leads' the sampling. */ if ((leader != evsel) && leader->sample_read) { - attr->sample_freq = 0; - attr->sample_period = 0; + attr->freq = 0; + attr->sample_freq = 0; + attr->sample_period = 0; + attr->write_backward = 0; + attr->sample_id_all = 0; } if (opts->no_samples) @@ -1922,7 +1925,8 @@ try_fallback: goto fallback_missing_features; } else if (!perf_missing_features.group_read && evsel->attr.inherit && - (evsel->attr.read_format & PERF_FORMAT_GROUP)) { + (evsel->attr.read_format & PERF_FORMAT_GROUP) && + perf_evsel__is_group_leader(evsel)) { perf_missing_features.group_read = true; pr_debug2("switching off group read\n"); goto fallback_missing_features; @@ -2754,8 +2758,14 @@ bool perf_evsel__fallback(struct perf_evsel *evsel, int err, (paranoid = perf_event_paranoid()) > 1) { const char *name = perf_evsel__name(evsel); char *new_name; + const char *sep = ":"; - if (asprintf(&new_name, "%s%su", name, strchr(name, ':') ? "" : ":") < 0) + /* Is there already the separator in the name. */ + if (strchr(name, '/') || + strchr(name, ':')) + sep = ""; + + if (asprintf(&new_name, "%s%su", name, sep) < 0) return false; if (evsel->name) @@ -2870,8 +2880,7 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, #if defined(__i386__) || defined(__x86_64__) if (evsel->attr.type == PERF_TYPE_HARDWARE) return scnprintf(msg, size, "%s", - "No hardware sampling interrupt available.\n" - "No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it."); + "No hardware sampling interrupt available.\n"); #endif break; case EBUSY: @@ -2894,8 +2903,7 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, return scnprintf(msg, size, "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n" - "/bin/dmesg may provide additional information.\n" - "No CONFIG_PERF_EVENTS=y kernel support configured?", + "/bin/dmesg | grep -i perf may provide additional information.\n", err, str_error_r(err, sbuf, sizeof(sbuf)), perf_evsel__name(evsel)); } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index d3ee3af618ef..b13f5f234c8f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -115,6 +115,7 @@ struct perf_evsel { unsigned int sample_size; int id_pos; int is_pos; + bool uniquified_name; bool snapshot; bool supported; bool needs_swap; @@ -126,6 +127,7 @@ struct perf_evsel { bool precise_max; bool ignore_missing_thread; bool forced_leader; + bool use_uncore_alias; /* parse modifier helper */ int exclude_GH; int nr_members; diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh index ff17920a5ebc..c3cef36d4176 100755 --- a/tools/perf/util/generate-cmdlist.sh +++ b/tools/perf/util/generate-cmdlist.sh @@ -38,7 +38,7 @@ do done echo "#endif /* HAVE_LIBELF_SUPPORT */" -echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE)" +echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)" sed -n -e 's/^perf-\([^ ]*\)[ ].* audit*/\1/p' command-list.txt | sort | while read cmd diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 121df1683c36..a8bff2178fbc 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1320,7 +1320,8 @@ static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp) dir = opendir(path); if (!dir) { - pr_warning("failed: can't open node sysfs data\n"); + pr_debug2("%s: could't read %s, does this arch have topology information?\n", + __func__, path); return -1; } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 7d968892ee39..4d602fba40b2 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -6,6 +6,7 @@ #include "session.h" #include "namespaces.h" #include "sort.h" +#include "units.h" #include "evlist.h" #include "evsel.h" #include "annotate.h" @@ -14,6 +15,7 @@ #include "ui/progress.h" #include <errno.h> #include <math.h> +#include <inttypes.h> #include <sys/param.h> static bool hists__filter_entry_by_dso(struct hists *hists, @@ -2454,6 +2456,85 @@ u64 hists__total_period(struct hists *hists) hists->stats.total_period; } +int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool show_freq) +{ + char unit; + int printed; + const struct dso *dso = hists->dso_filter; + const struct thread *thread = hists->thread_filter; + int socket_id = hists->socket_filter; + unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; + u64 nr_events = hists->stats.total_period; + struct perf_evsel *evsel = hists_to_evsel(hists); + const char *ev_name = perf_evsel__name(evsel); + char buf[512], sample_freq_str[64] = ""; + size_t buflen = sizeof(buf); + char ref[30] = " show reference callgraph, "; + bool enable_ref = false; + + if (symbol_conf.filter_relative) { + nr_samples = hists->stats.nr_non_filtered_samples; + nr_events = hists->stats.total_non_filtered_period; + } + + if (perf_evsel__is_group_event(evsel)) { + struct perf_evsel *pos; + + perf_evsel__group_desc(evsel, buf, buflen); + ev_name = buf; + + for_each_group_member(pos, evsel) { + struct hists *pos_hists = evsel__hists(pos); + + if (symbol_conf.filter_relative) { + nr_samples += pos_hists->stats.nr_non_filtered_samples; + nr_events += pos_hists->stats.total_non_filtered_period; + } else { + nr_samples += pos_hists->stats.nr_events[PERF_RECORD_SAMPLE]; + nr_events += pos_hists->stats.total_period; + } + } + } + + if (symbol_conf.show_ref_callgraph && + strstr(ev_name, "call-graph=no")) + enable_ref = true; + + if (show_freq) + scnprintf(sample_freq_str, sizeof(sample_freq_str), " %d Hz,", evsel->attr.sample_freq); + + nr_samples = convert_unit(nr_samples, &unit); + printed = scnprintf(bf, size, + "Samples: %lu%c of event%s '%s',%s%sEvent count (approx.): %" PRIu64, + nr_samples, unit, evsel->nr_members > 1 ? "s" : "", + ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events); + + + if (hists->uid_filter_str) + printed += snprintf(bf + printed, size - printed, + ", UID: %s", hists->uid_filter_str); + if (thread) { + if (hists__has(hists, thread)) { + printed += scnprintf(bf + printed, size - printed, + ", Thread: %s(%d)", + (thread->comm_set ? thread__comm_str(thread) : ""), + thread->tid); + } else { + printed += scnprintf(bf + printed, size - printed, + ", Thread: %s", + (thread->comm_set ? thread__comm_str(thread) : "")); + } + } + if (dso) + printed += scnprintf(bf + printed, size - printed, + ", DSO: %s", dso->short_name); + if (socket_id > -1) + printed += scnprintf(bf + printed, size - printed, + ", Processor Socket: %d", socket_id); + + return printed; +} + int parse_filter_percentage(const struct option *opt __maybe_unused, const char *arg, int unset __maybe_unused) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index e869cad4d89f..fbabfd8a215d 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -61,6 +61,7 @@ enum hist_column { HISTC_SRCLINE_TO, HISTC_TRACE, HISTC_SYM_SIZE, + HISTC_DSO_SIZE, HISTC_NR_COLS, /* Last entry */ }; @@ -503,5 +504,11 @@ int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...); int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp, struct perf_hpp_list *hpp_list); int hists__fprintf_headers(struct hists *hists, FILE *fp); +int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool show_freq); + +static inline int hists__scnprintf_title(struct hists *hists, char *bf, size_t size) +{ + return __hists__scnprintf_title(hists, bf, size, true); +} #endif /* __PERF_HIST_H */ diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 2eca8478e24f..32d50492505d 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1019,13 +1019,6 @@ int machine__load_vmlinux_path(struct machine *machine, enum map_type type) return ret; } -static void map_groups__fixup_end(struct map_groups *mg) -{ - int i; - for (i = 0; i < MAP__NR_TYPES; ++i) - __map_groups__fixup_end(mg, i); -} - static char *get_kernel_version(const char *root_dir) { char version[PATH_MAX]; @@ -1233,6 +1226,7 @@ int machine__create_kernel_maps(struct machine *machine) { struct dso *kernel = machine__get_kernel(machine); const char *name = NULL; + struct map *map; u64 addr = 0; int ret; @@ -1259,13 +1253,25 @@ int machine__create_kernel_maps(struct machine *machine) machine__destroy_kernel_maps(machine); return -1; } - machine__set_kernel_mmap(machine, addr, 0); + + /* we have a real start address now, so re-order the kmaps */ + map = machine__kernel_map(machine); + + map__get(map); + map_groups__remove(&machine->kmaps, map); + + /* assume it's the last in the kmaps */ + machine__set_kernel_mmap(machine, addr, ~0ULL); + + map_groups__insert(&machine->kmaps, map); + map__put(map); } - /* - * Now that we have all the maps created, just set the ->end of them: - */ - map_groups__fixup_end(&machine->kmaps); + /* update end address of the kernel map using adjacent module address */ + map = map__next(machine__kernel_map(machine)); + if (map) + machine__set_kernel_mmap(machine, addr, map->start); + return 0; } diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index edeb7291c8e1..0e9bbe01b0ab 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -103,6 +103,10 @@ static inline u64 identity__map_ip(struct map *map __maybe_unused, u64 ip) return ip; } +static inline size_t map__size(const struct map *map) +{ + return map->end - map->start; +} /* rip/ip <-> addr suitable for passing to `objdump --start-address=` */ u64 map__rip_2objdump(struct map *map, u64 rip); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 2fb0272146d8..2fc4ee8b86c1 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1219,13 +1219,16 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, char *name, - struct list_head *head_config, bool auto_merge_stats) + struct list_head *head_config, + bool auto_merge_stats, + bool use_alias) { struct perf_event_attr attr; struct perf_pmu_info info; struct perf_pmu *pmu; struct perf_evsel *evsel; struct parse_events_error *err = parse_state->error; + bool use_uncore_alias; LIST_HEAD(config_terms); pmu = perf_pmu__find(name); @@ -1244,11 +1247,14 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, memset(&attr, 0, sizeof(attr)); } + use_uncore_alias = (pmu->is_uncore && use_alias); + if (!head_config) { attr.type = pmu->type; evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats); if (evsel) { evsel->pmu_name = name; + evsel->use_uncore_alias = use_uncore_alias; return 0; } else { return -ENOMEM; @@ -1282,6 +1288,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, evsel->metric_expr = info.metric_expr; evsel->metric_name = info.metric_name; evsel->pmu_name = name; + evsel->use_uncore_alias = use_uncore_alias; } return evsel ? 0 : -ENOMEM; @@ -1317,7 +1324,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, list_add_tail(&term->list, head); if (!parse_events_add_pmu(parse_state, list, - pmu->name, head, true)) { + pmu->name, head, + true, true)) { pr_debug("%s -> %s/%s/\n", str, pmu->name, alias->str); ok++; @@ -1339,7 +1347,120 @@ int parse_events__modifier_group(struct list_head *list, return parse_events__modifier_event(list, event_mod, true); } -void parse_events__set_leader(char *name, struct list_head *list) +/* + * Check if the two uncore PMUs are from the same uncore block + * The format of the uncore PMU name is uncore_#blockname_#pmuidx + */ +static bool is_same_uncore_block(const char *pmu_name_a, const char *pmu_name_b) +{ + char *end_a, *end_b; + + end_a = strrchr(pmu_name_a, '_'); + end_b = strrchr(pmu_name_b, '_'); + + if (!end_a || !end_b) + return false; + + if ((end_a - pmu_name_a) != (end_b - pmu_name_b)) + return false; + + return (strncmp(pmu_name_a, pmu_name_b, end_a - pmu_name_a) == 0); +} + +static int +parse_events__set_leader_for_uncore_aliase(char *name, struct list_head *list, + struct parse_events_state *parse_state) +{ + struct perf_evsel *evsel, *leader; + uintptr_t *leaders; + bool is_leader = true; + int i, nr_pmu = 0, total_members, ret = 0; + + leader = list_first_entry(list, struct perf_evsel, node); + evsel = list_last_entry(list, struct perf_evsel, node); + total_members = evsel->idx - leader->idx + 1; + + leaders = calloc(total_members, sizeof(uintptr_t)); + if (WARN_ON(!leaders)) + return 0; + + /* + * Going through the whole group and doing sanity check. + * All members must use alias, and be from the same uncore block. + * Also, storing the leader events in an array. + */ + __evlist__for_each_entry(list, evsel) { + + /* Only split the uncore group which members use alias */ + if (!evsel->use_uncore_alias) + goto out; + + /* The events must be from the same uncore block */ + if (!is_same_uncore_block(leader->pmu_name, evsel->pmu_name)) + goto out; + + if (!is_leader) + continue; + /* + * If the event's PMU name starts to repeat, it must be a new + * event. That can be used to distinguish the leader from + * other members, even they have the same event name. + */ + if ((leader != evsel) && (leader->pmu_name == evsel->pmu_name)) { + is_leader = false; + continue; + } + /* The name is always alias name */ + WARN_ON(strcmp(leader->name, evsel->name)); + + /* Store the leader event for each PMU */ + leaders[nr_pmu++] = (uintptr_t) evsel; + } + + /* only one event alias */ + if (nr_pmu == total_members) { + parse_state->nr_groups--; + goto handled; + } + + /* + * An uncore event alias is a joint name which means the same event + * runs on all PMUs of a block. + * Perf doesn't support mixed events from different PMUs in the same + * group. The big group has to be split into multiple small groups + * which only include the events from the same PMU. + * + * Here the uncore event aliases must be from the same uncore block. + * The number of PMUs must be same for each alias. The number of new + * small groups equals to the number of PMUs. + * Setting the leader event for corresponding members in each group. + */ + i = 0; + __evlist__for_each_entry(list, evsel) { + if (i >= nr_pmu) + i = 0; + evsel->leader = (struct perf_evsel *) leaders[i++]; + } + + /* The number of members and group name are same for each group */ + for (i = 0; i < nr_pmu; i++) { + evsel = (struct perf_evsel *) leaders[i]; + evsel->nr_members = total_members / nr_pmu; + evsel->group_name = name ? strdup(name) : NULL; + } + + /* Take the new small groups into account */ + parse_state->nr_groups += nr_pmu - 1; + +handled: + ret = 1; +out: + free(leaders); + return ret; +} + +void parse_events__set_leader(char *name, struct list_head *list, + struct parse_events_state *parse_state) { struct perf_evsel *leader; @@ -1348,6 +1469,9 @@ void parse_events__set_leader(char *name, struct list_head *list) return; } + if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state)) + return; + __perf_evlist__set_leader(list); leader = list_entry(list->next, struct perf_evsel, node); leader->group_name = name ? strdup(name) : NULL; @@ -1715,7 +1839,7 @@ int parse_events(struct perf_evlist *evlist, const char *str, struct perf_evsel *last; if (list_empty(&parse_state.list)) { - WARN_ONCE(true, "WARNING: event parser found nothing"); + WARN_ONCE(true, "WARNING: event parser found nothing\n"); return -1; } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 5015cfd58277..4473dac27aee 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -167,7 +167,9 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx, void *ptr, char *type, u64 len); int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, char *name, - struct list_head *head_config, bool auto_merge_stats); + struct list_head *head_config, + bool auto_merge_stats, + bool use_alias); int parse_events_multi_pmu_add(struct parse_events_state *parse_state, char *str, @@ -178,7 +180,8 @@ int parse_events_copy_term_list(struct list_head *old, enum perf_pmu_event_symbol_type perf_pmu__parse_check(const char *name); -void parse_events__set_leader(char *name, struct list_head *list); +void parse_events__set_leader(char *name, struct list_head *list, + struct parse_events_state *parse_state); void parse_events_update_lists(struct list_head *list_event, struct list_head *list_all); void parse_events_evlist_error(struct parse_events_state *parse_state, diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 7afeb80cc39e..e37608a87dba 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -161,7 +161,7 @@ PE_NAME '{' events '}' struct list_head *list = $3; inc_group_count(list, _parse_state); - parse_events__set_leader($1, list); + parse_events__set_leader($1, list, _parse_state); $$ = list; } | @@ -170,7 +170,7 @@ PE_NAME '{' events '}' struct list_head *list = $2; inc_group_count(list, _parse_state); - parse_events__set_leader(NULL, list); + parse_events__set_leader(NULL, list, _parse_state); $$ = list; } @@ -232,7 +232,7 @@ PE_NAME opt_event_config YYABORT; ALLOC_LIST(list); - if (parse_events_add_pmu(_parse_state, list, $1, $2, false)) { + if (parse_events_add_pmu(_parse_state, list, $1, $2, false, false)) { struct perf_pmu *pmu = NULL; int ok = 0; char *pattern; @@ -251,7 +251,7 @@ PE_NAME opt_event_config free(pattern); YYABORT; } - if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true)) + if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true, false)) ok++; parse_events_terms__delete(terms); } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 064bdcb7bd78..d2fb597c9a8c 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -539,9 +539,10 @@ static bool pmu_is_uncore(const char *name) /* * PMU CORE devices have different name other than cpu in sysfs on some - * platforms. looking for possible sysfs files to identify as core device. + * platforms. + * Looking for possible sysfs files to identify the arm core device. */ -static int is_pmu_core(const char *name) +static int is_arm_pmu_core(const char *name) { struct stat st; char path[PATH_MAX]; @@ -550,18 +551,18 @@ static int is_pmu_core(const char *name) if (!sysfs) return 0; - /* Look for cpu sysfs (x86 and others) */ - scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu", sysfs); - if ((stat(path, &st) == 0) && - (strncmp(name, "cpu", strlen("cpu")) == 0)) - return 1; - /* Look for cpu sysfs (specific to arm) */ scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus", sysfs, name); if (stat(path, &st) == 0) return 1; + /* Look for cpu sysfs (specific to s390) */ + scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s", + sysfs, name); + if (stat(path, &st) == 0 && !strncmp(name, "cpum_", 5)) + return 1; + return 0; } @@ -580,7 +581,7 @@ char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused) * cpuid string generated on this platform. * Otherwise return non-zero. */ -int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid) +int strcmp_cpuid_str(const char *mapcpuid, const char *cpuid) { regex_t re; regmatch_t pmatch[1]; @@ -662,6 +663,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) struct pmu_events_map *map; struct pmu_event *pe; const char *name = pmu->name; + const char *pname; map = perf_pmu__find_map(pmu); if (!map) @@ -680,11 +682,9 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) break; } - if (!is_pmu_core(name)) { - /* check for uncore devices */ - if (pe->pmu == NULL) - continue; - if (strncmp(pe->pmu, name, strlen(pe->pmu))) + if (!is_arm_pmu_core(name)) { + pname = pe->pmu ? pe->pmu : "cpu"; + if (strncmp(pname, name, strlen(pname))) continue; } diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 10dd5fce082b..7f8afacd08ee 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -531,6 +531,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, PyLong_FromUnsignedLongLong(sample->period)); pydict_set_item_string_decref(dict_sample, "phys_addr", PyLong_FromUnsignedLongLong(sample->phys_addr)); + pydict_set_item_string_decref(dict_sample, "addr", + PyLong_FromUnsignedLongLong(sample->addr)); set_sample_read_in_dict(dict_sample, sample, evsel); pydict_set_item_string_decref(dict, "sample", dict_sample); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index c71ced7db152..f4a7a437ee87 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1591,7 +1591,7 @@ static void perf_session__warn_about_errors(const struct perf_session *session) drop_rate = (double)stats->total_lost_samples / (double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples); if (drop_rate > 0.05) { - ui__warning("Processed %" PRIu64 " samples and lost %3.2f%% samples!\n\n", + ui__warning("Processed %" PRIu64 " samples and lost %3.2f%%!\n\n", stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples, drop_rate * 100.0); } diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index e8514f651865..26a68dfd8a4f 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1545,6 +1545,46 @@ struct sort_entry sort_sym_size = { .se_width_idx = HISTC_SYM_SIZE, }; +/* --sort dso_size */ + +static int64_t _sort__dso_size_cmp(struct map *map_l, struct map *map_r) +{ + int64_t size_l = map_l != NULL ? map__size(map_l) : 0; + int64_t size_r = map_r != NULL ? map__size(map_r) : 0; + + return size_l < size_r ? -1 : + size_l == size_r ? 0 : 1; +} + +static int64_t +sort__dso_size_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return _sort__dso_size_cmp(right->ms.map, left->ms.map); +} + +static int _hist_entry__dso_size_snprintf(struct map *map, char *bf, + size_t bf_size, unsigned int width) +{ + if (map && map->dso) + return repsep_snprintf(bf, bf_size, "%*d", width, + map__size(map)); + + return repsep_snprintf(bf, bf_size, "%*s", width, "unknown"); +} + +static int hist_entry__dso_size_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return _hist_entry__dso_size_snprintf(he->ms.map, bf, size, width); +} + +struct sort_entry sort_dso_size = { + .se_header = "DSO size", + .se_cmp = sort__dso_size_cmp, + .se_snprintf = hist_entry__dso_size_snprintf, + .se_width_idx = HISTC_DSO_SIZE, +}; + struct sort_dimension { const char *name; @@ -1569,6 +1609,7 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_TRANSACTION, "transaction", sort_transaction), DIM(SORT_TRACE, "trace", sort_trace), DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size), + DIM(SORT_DSO_SIZE, "dso_size", sort_dso_size), DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id), }; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index f5901c10a563..035b62e2c60b 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -220,6 +220,7 @@ enum sort_type { SORT_TRANSACTION, SORT_TRACE, SORT_SYM_SIZE, + SORT_DSO_SIZE, SORT_CGROUP_ID, /* branch stack specific sort keys */ diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 62b2dd2253eb..1466814ebada 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2091,16 +2091,14 @@ static bool symbol__read_kptr_restrict(void) int symbol__annotation_init(void) { + if (symbol_conf.init_annotation) + return 0; + if (symbol_conf.initialized) { pr_err("Annotation needs to be init before symbol__init()\n"); return -1; } - if (symbol_conf.init_annotation) { - pr_warning("Annotation being initialized multiple times\n"); - return 0; - } - symbol_conf.priv_size += sizeof(struct annotation); symbol_conf.init_annotation = true; return 0; diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 895122d638dd..0ee7f568d60c 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -17,7 +17,7 @@ #include <stdlib.h> #include <linux/compiler.h> -#ifdef HAVE_SYSCALL_TABLE +#ifdef HAVE_SYSCALL_TABLE_SUPPORT #include <string.h> #include "string2.h" #include "util.h" @@ -139,7 +139,7 @@ int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_g return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx); } -#else /* HAVE_SYSCALL_TABLE */ +#else /* HAVE_SYSCALL_TABLE_SUPPORT */ #include <libaudit.h> @@ -176,4 +176,4 @@ int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_g { return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx); } -#endif /* HAVE_SYSCALL_TABLE */ +#endif /* HAVE_SYSCALL_TABLE_SUPPORT */ diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 0ac9077f62a2..b1e5c3a2b8e3 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -98,7 +98,7 @@ static void register_python_scripting(struct scripting_ops *scripting_ops) } } -#ifdef NO_LIBPYTHON +#ifndef HAVE_LIBPYTHON_SUPPORT void setup_python_scripting(void) { register_python_scripting(&python_scripting_unsupported_ops); @@ -161,7 +161,7 @@ static void register_perl_scripting(struct scripting_ops *scripting_ops) } } -#ifdef NO_LIBPERL +#ifndef HAVE_LIBPERL_SUPPORT void setup_perl_scripting(void) { register_perl_scripting(&perl_scripting_unsupported_ops); diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 9496365da3d7..c9626c206208 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -11,8 +11,7 @@ #include <stdlib.h> #include <stdarg.h> #include <linux/compiler.h> -#include <linux/types.h> -#include "namespaces.h" +#include <sys/types.h> /* General helper functions */ void usage(const char *err) __noreturn; @@ -26,6 +25,7 @@ static inline void *zalloc(size_t size) #define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) struct dirent; +struct nsinfo; struct strlist; int mkdir_p(char *path, mode_t mode); diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config index 2cccbba64418..f304be71c278 100644 --- a/tools/power/acpi/Makefile.config +++ b/tools/power/acpi/Makefile.config @@ -56,6 +56,7 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM} # to compile vs uClibc, that can be done here as well. CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc- CROSS_COMPILE ?= $(CROSS) +LD = $(CC) HOSTCC = gcc # check if compiler option is supported diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index dd614463d4d6..495066bafbe3 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -120,3 +120,5 @@ ifneq ($(silent),1) QUIET_UNINST = @printf ' UNINST %s\n' $1; endif endif + +pound := \# diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index cb166be4918d..4ea385be528f 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -138,6 +138,7 @@ static u32 handle[] = { }; static unsigned long dimm_fail_cmd_flags[NUM_DCR]; +static int dimm_fail_cmd_code[NUM_DCR]; struct nfit_test_fw { enum intel_fw_update_state state; @@ -892,8 +893,11 @@ static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func) if (i >= ARRAY_SIZE(handle)) return -ENXIO; - if ((1 << func) & dimm_fail_cmd_flags[i]) + if ((1 << func) & dimm_fail_cmd_flags[i]) { + if (dimm_fail_cmd_code[i]) + return dimm_fail_cmd_code[i]; return -EIO; + } return i; } @@ -1162,12 +1166,12 @@ static int ars_state_init(struct device *dev, struct ars_state *ars_state) static void put_dimms(void *data) { - struct device **dimm_dev = data; + struct nfit_test *t = data; int i; - for (i = 0; i < NUM_DCR; i++) - if (dimm_dev[i]) - device_unregister(dimm_dev[i]); + for (i = 0; i < t->num_dcr; i++) + if (t->dimm_dev[i]) + device_unregister(t->dimm_dev[i]); } static struct class *nfit_test_dimm; @@ -1176,13 +1180,11 @@ static int dimm_name_to_id(struct device *dev) { int dimm; - if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1 - || dimm >= NUM_DCR || dimm < 0) + if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1) return -ENXIO; return dimm; } - static ssize_t handle_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1191,7 +1193,7 @@ static ssize_t handle_show(struct device *dev, struct device_attribute *attr, if (dimm < 0) return dimm; - return sprintf(buf, "%#x", handle[dimm]); + return sprintf(buf, "%#x\n", handle[dimm]); } DEVICE_ATTR_RO(handle); @@ -1225,8 +1227,39 @@ static ssize_t fail_cmd_store(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RW(fail_cmd); +static ssize_t fail_cmd_code_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + int dimm = dimm_name_to_id(dev); + + if (dimm < 0) + return dimm; + + return sprintf(buf, "%d\n", dimm_fail_cmd_code[dimm]); +} + +static ssize_t fail_cmd_code_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + int dimm = dimm_name_to_id(dev); + unsigned long val; + ssize_t rc; + + if (dimm < 0) + return dimm; + + rc = kstrtol(buf, 0, &val); + if (rc) + return rc; + + dimm_fail_cmd_code[dimm] = val; + return size; +} +static DEVICE_ATTR_RW(fail_cmd_code); + static struct attribute *nfit_test_dimm_attributes[] = { &dev_attr_fail_cmd.attr, + &dev_attr_fail_cmd_code.attr, &dev_attr_handle.attr, NULL, }; @@ -1240,6 +1273,23 @@ static const struct attribute_group *nfit_test_dimm_attribute_groups[] = { NULL, }; +static int nfit_test_dimm_init(struct nfit_test *t) +{ + int i; + + if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t)) + return -ENOMEM; + for (i = 0; i < t->num_dcr; i++) { + t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm, + &t->pdev.dev, 0, NULL, + nfit_test_dimm_attribute_groups, + "test_dimm%d", i + t->dcr_idx); + if (!t->dimm_dev[i]) + return -ENOMEM; + } + return 0; +} + static void smart_init(struct nfit_test *t) { int i; @@ -1335,17 +1385,8 @@ static int nfit_test0_alloc(struct nfit_test *t) if (!t->_fit) return -ENOMEM; - if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t->dimm_dev)) + if (nfit_test_dimm_init(t)) return -ENOMEM; - for (i = 0; i < NUM_DCR; i++) { - t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm, - &t->pdev.dev, 0, NULL, - nfit_test_dimm_attribute_groups, - "test_dimm%d", i); - if (!t->dimm_dev[i]) - return -ENOMEM; - } - smart_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } @@ -1377,6 +1418,8 @@ static int nfit_test1_alloc(struct nfit_test *t) if (!t->spa_set[1]) return -ENOMEM; + if (nfit_test_dimm_init(t)) + return -ENOMEM; smart_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } @@ -2222,6 +2265,9 @@ static void nfit_test1_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); } static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index fa7ee369b3c9..db66f8a0d4be 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -17,7 +17,7 @@ ifeq ($(BUILD), 32) LDFLAGS += -m32 endif -targets: mapshift $(TARGETS) +targets: generated/map-shift.h $(TARGETS) main: $(OFILES) @@ -42,9 +42,7 @@ radix-tree.c: ../../../lib/radix-tree.c idr.c: ../../../lib/idr.c sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@ -.PHONY: mapshift - -mapshift: +generated/map-shift.h: @if ! grep -qws $(SHIFT) generated/map-shift.h; then \ echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" > \ generated/map-shift.h; \ diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 6c645eb77d42..ee820fcc29b0 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -252,6 +252,13 @@ void idr_checks(void) idr_remove(&idr, 3); idr_remove(&idr, 0); + assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == 0); + idr_remove(&idr, 1); + for (i = 1; i < RADIX_TREE_MAP_SIZE; i++) + assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == i); + idr_remove(&idr, 1 << 30); + idr_destroy(&idr); + for (i = INT_MAX - 3UL; i < INT_MAX + 1UL; i++) { struct item *item = item_create(i, 0); assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i); diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 59245b3d587c..7bf405638b0b 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -16,6 +16,7 @@ #include <linux/radix-tree.h> #include <linux/slab.h> #include <linux/errno.h> +#include <pthread.h> #include "test.h" @@ -624,6 +625,67 @@ static void multiorder_account(void) item_kill_tree(&tree); } +bool stop_iteration = false; + +static void *creator_func(void *ptr) +{ + /* 'order' is set up to ensure we have sibling entries */ + unsigned int order = RADIX_TREE_MAP_SHIFT - 1; + struct radix_tree_root *tree = ptr; + int i; + + for (i = 0; i < 10000; i++) { + item_insert_order(tree, 0, order); + item_delete_rcu(tree, 0); + } + + stop_iteration = true; + return NULL; +} + +static void *iterator_func(void *ptr) +{ + struct radix_tree_root *tree = ptr; + struct radix_tree_iter iter; + struct item *item; + void **slot; + + while (!stop_iteration) { + rcu_read_lock(); + radix_tree_for_each_slot(slot, tree, &iter, 0) { + item = radix_tree_deref_slot(slot); + + if (!item) + continue; + if (radix_tree_deref_retry(item)) { + slot = radix_tree_iter_retry(&iter); + continue; + } + + item_sanity(item, iter.index); + } + rcu_read_unlock(); + } + return NULL; +} + +static void multiorder_iteration_race(void) +{ + const int num_threads = sysconf(_SC_NPROCESSORS_ONLN); + pthread_t worker_thread[num_threads]; + RADIX_TREE(tree, GFP_KERNEL); + int i; + + pthread_create(&worker_thread[0], NULL, &creator_func, &tree); + for (i = 1; i < num_threads; i++) + pthread_create(&worker_thread[i], NULL, &iterator_func, &tree); + + for (i = 0; i < num_threads; i++) + pthread_join(worker_thread[i], NULL); + + item_kill_tree(&tree); +} + void multiorder_checks(void) { int i; @@ -644,6 +706,7 @@ void multiorder_checks(void) multiorder_join(); multiorder_split(); multiorder_account(); + multiorder_iteration_race(); radix_tree_cpu_dead(0); } diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index 5978ab1f403d..def6015570b2 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -75,6 +75,25 @@ int item_delete(struct radix_tree_root *root, unsigned long index) return 0; } +static void item_free_rcu(struct rcu_head *head) +{ + struct item *item = container_of(head, struct item, rcu_head); + + free(item); +} + +int item_delete_rcu(struct radix_tree_root *root, unsigned long index) +{ + struct item *item = radix_tree_delete(root, index); + + if (item) { + item_sanity(item, index); + call_rcu(&item->rcu_head, item_free_rcu); + return 1; + } + return 0; +} + void item_check_present(struct radix_tree_root *root, unsigned long index) { struct item *item; diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index d9c031dbeb1a..31f1d9b6f506 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -5,6 +5,7 @@ #include <linux/rcupdate.h> struct item { + struct rcu_head rcu_head; unsigned long index; unsigned int order; }; @@ -12,9 +13,11 @@ struct item { struct item *item_create(unsigned long index, unsigned int order); int __item_insert(struct radix_tree_root *root, struct item *item); int item_insert(struct radix_tree_root *root, unsigned long index); +void item_sanity(struct item *item, unsigned long index); int item_insert_order(struct radix_tree_root *root, unsigned long index, unsigned order); int item_delete(struct radix_tree_root *root, unsigned long index); +int item_delete_rcu(struct radix_tree_root *root, unsigned long index); struct item *item_lookup(struct radix_tree_root *root, unsigned long index); void item_check_present(struct radix_tree_root *root, unsigned long index); diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 9cf83f895d98..5e1ab2f0eb79 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -12,3 +12,6 @@ test_tcpbpf_user test_verifier_log feature test_libbpf_open +test_sock +test_sock_addr +urandom_read diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 983dd25d49f4..1eefe211a4a8 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -5,3 +5,5 @@ CONFIG_BPF_EVENTS=y CONFIG_TEST_BPF=m CONFIG_CGROUP_BPF=y CONFIG_NETDEVSIM=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_SCH_INGRESS=y diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index faadbe233966..4123d0ab90ba 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1108,7 +1108,7 @@ static void test_stacktrace_build_id(void) assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0); - assert(system("./urandom_read if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0); + assert(system("./urandom_read") == 0); /* disable stack trace collection */ key = 0; val = 1; @@ -1158,7 +1158,7 @@ static void test_stacktrace_build_id(void) } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); CHECK(build_id_matches < 1, "build id match", - "Didn't find expected build ID from the map"); + "Didn't find expected build ID from the map\n"); disable_pmu: ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c index 73bb20cfb9b7..f4d99fabc56d 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/test_sock.c @@ -13,6 +13,7 @@ #include <bpf/bpf.h> #include "cgroup_helpers.h" +#include "bpf_rlimit.h" #ifndef ARRAY_SIZE # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index d488f20926e8..2950f80ba7fb 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -15,6 +15,7 @@ #include <bpf/libbpf.h> #include "cgroup_helpers.h" +#include "bpf_rlimit.h" #define CG_PATH "/foo" #define CONNECT4_PROG_PATH "./connect4_prog.o" diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh index c6e1dcf992c4..9832a875a828 100755 --- a/tools/testing/selftests/bpf/test_sock_addr.sh +++ b/tools/testing/selftests/bpf/test_sock_addr.sh @@ -4,7 +4,7 @@ set -eu ping_once() { - ping -q -c 1 -W 1 ${1%%/*} >/dev/null 2>&1 + ping -${1} -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1 } wait_for_ip() @@ -13,7 +13,7 @@ wait_for_ip() echo -n "Wait for testing IPv4/IPv6 to become available " for _i in $(seq ${MAX_PING_TRIES}); do echo -n "." - if ping_once ${TEST_IPv4} && ping_once ${TEST_IPv6}; then + if ping_once 4 ${TEST_IPv4} && ping_once 6 ${TEST_IPv6}; then echo " OK" return fi diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3e7718b1a9ae..fd7de7eb329e 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -11713,6 +11713,11 @@ static void get_unpriv_disabled() FILE *fd; fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r"); + if (!fd) { + perror("fopen /proc/sys/"UNPRIV_SYSCTL); + unpriv_disabled = true; + return; + } if (fgets(buf, 2, fd) == buf && atoi(buf)) unpriv_disabled = true; fclose(fd); diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index 4e6d09fb166f..5c7d7001ad37 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,8 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := dnotify_test devpts_pts -all: $(TEST_PROGS) -include ../lib.mk +TEST_GEN_PROGS := devpts_pts +TEST_GEN_PROGS_EXTENDED := dnotify_test -clean: - rm -fr $(TEST_PROGS) +include ../lib.mk diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile index 826f38d5dd19..261c81f08606 100644 --- a/tools/testing/selftests/firmware/Makefile +++ b/tools/testing/selftests/firmware/Makefile @@ -4,6 +4,7 @@ all: TEST_PROGS := fw_run_tests.sh +TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_lib.sh include ../lib.mk diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh index 9ea31b57d71a..962d7f4ac627 100755 --- a/tools/testing/selftests/firmware/fw_lib.sh +++ b/tools/testing/selftests/firmware/fw_lib.sh @@ -154,11 +154,13 @@ test_finish() if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout fi - if [ "$OLD_FWPATH" = "" ]; then - OLD_FWPATH=" " - fi if [ "$TEST_REQS_FW_SET_CUSTOM_PATH" = "yes" ]; then - echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path + if [ "$OLD_FWPATH" = "" ]; then + # A zero-length write won't work; write a null byte + printf '\000' >/sys/module/firmware_class/parameters/path + else + echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path + fi fi if [ -f $FW ]; then rm -f "$FW" diff --git a/tools/testing/selftests/firmware/fw_run_tests.sh b/tools/testing/selftests/firmware/fw_run_tests.sh index 06d638e9dc62..cffdd4eb0a57 100755 --- a/tools/testing/selftests/firmware/fw_run_tests.sh +++ b/tools/testing/selftests/firmware/fw_run_tests.sh @@ -66,5 +66,5 @@ if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then run_test_config_0003 else echo "Running basic kernel configuration, working with your config" - run_test + run_tests fi diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc index 786dce7e48be..2aabab363cfb 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc @@ -29,7 +29,7 @@ do_reset echo "Test extended error support" echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger -echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger &>/dev/null +! echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger 2> /dev/null if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then fail "Failed to generate extended error in histogram" fi diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc new file mode 100644 index 000000000000..c193dce611a2 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc @@ -0,0 +1,44 @@ +#!/bin/sh +# description: event trigger - test multiple actions on hist trigger + + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +clear_synthetic_events +reset_tracer +do_reset + +echo "Test multiple actions on hist trigger" +echo 'wakeup_latency u64 lat; pid_t pid' >> synthetic_events +TRIGGER1=events/sched/sched_wakeup/trigger +TRIGGER2=events/sched/sched_switch/trigger + +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="cyclictest"' > $TRIGGER1 +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0 if next_comm=="cyclictest"' >> $TRIGGER2 +echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,next_pid) if next_comm=="cyclictest"' >> $TRIGGER2 +echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid) if next_comm=="cyclictest"' >> $TRIGGER2 +echo 'hist:keys=next_pid if next_comm=="cyclictest"' >> $TRIGGER2 + +do_reset + +exit 0 diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index dc44de904797..d9d00319b07c 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -4,17 +4,18 @@ top_srcdir = ../../../../ UNAME_M := $(shell uname -m) LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c -LIBKVM_x86_64 = lib/x86.c +LIBKVM_x86_64 = lib/x86.c lib/vmx.c TEST_GEN_PROGS_x86_64 = set_sregs_test TEST_GEN_PROGS_x86_64 += sync_regs_test +TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) LIBKVM += $(LIBKVM_$(UNAME_M)) INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ -CFLAGS += -O2 -g -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) +CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I.. # After inclusion, $(OUTPUT) is defined and # $(TEST_GEN_PROGS) starts with $(OUTPUT)/ diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 57974ad46373..637b7017b6ee 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -112,24 +112,27 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, uint32_t memslot); -void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid); +struct kvm_cpuid2 *kvm_get_supported_cpuid(void); void vcpu_set_cpuid( struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid); -struct kvm_cpuid2 *allocate_kvm_cpuid2(void); struct kvm_cpuid_entry2 * -find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, - uint32_t index); +kvm_get_supported_cpuid_index(uint32_t function, uint32_t index); static inline struct kvm_cpuid_entry2 * -find_cpuid_entry(struct kvm_cpuid2 *cpuid, uint32_t function) +kvm_get_supported_cpuid_entry(uint32_t function) { - return find_cpuid_index_entry(cpuid, function, 0); + return kvm_get_supported_cpuid_index(function, 0); } struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code); void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); +typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr, + vm_paddr_t vmxon_paddr, + vm_vaddr_t vmcs_vaddr, + vm_paddr_t vmcs_paddr); + struct kvm_userspace_memory_region * kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end); diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 7ab98e41324f..ac53730b30aa 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -19,6 +19,7 @@ #include <errno.h> #include <unistd.h> #include <fcntl.h> +#include "kselftest.h" ssize_t test_write(int fd, const void *buf, size_t count); ssize_t test_read(int fd, void *buf, size_t count); diff --git a/tools/testing/selftests/kvm/include/vmx.h b/tools/testing/selftests/kvm/include/vmx.h new file mode 100644 index 000000000000..6ed8499807fd --- /dev/null +++ b/tools/testing/selftests/kvm/include/vmx.h @@ -0,0 +1,494 @@ +/* + * tools/testing/selftests/kvm/include/vmx.h + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + */ + +#ifndef SELFTEST_KVM_VMX_H +#define SELFTEST_KVM_VMX_H + +#include <stdint.h> +#include "x86.h" + +#define CPUID_VMX_BIT 5 + +#define CPUID_VMX (1 << 5) + +/* + * Definitions of Primary Processor-Based VM-Execution Controls. + */ +#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 +#define CPU_BASED_USE_TSC_OFFSETING 0x00000008 +#define CPU_BASED_HLT_EXITING 0x00000080 +#define CPU_BASED_INVLPG_EXITING 0x00000200 +#define CPU_BASED_MWAIT_EXITING 0x00000400 +#define CPU_BASED_RDPMC_EXITING 0x00000800 +#define CPU_BASED_RDTSC_EXITING 0x00001000 +#define CPU_BASED_CR3_LOAD_EXITING 0x00008000 +#define CPU_BASED_CR3_STORE_EXITING 0x00010000 +#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 +#define CPU_BASED_CR8_STORE_EXITING 0x00100000 +#define CPU_BASED_TPR_SHADOW 0x00200000 +#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 +#define CPU_BASED_MOV_DR_EXITING 0x00800000 +#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 +#define CPU_BASED_USE_IO_BITMAPS 0x02000000 +#define CPU_BASED_MONITOR_TRAP 0x08000000 +#define CPU_BASED_USE_MSR_BITMAPS 0x10000000 +#define CPU_BASED_MONITOR_EXITING 0x20000000 +#define CPU_BASED_PAUSE_EXITING 0x40000000 +#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 + +#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172 + +/* + * Definitions of Secondary Processor-Based VM-Execution Controls. + */ +#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 +#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 +#define SECONDARY_EXEC_DESC 0x00000004 +#define SECONDARY_EXEC_RDTSCP 0x00000008 +#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 +#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 +#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 +#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 +#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 +#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 +#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 +#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800 +#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 +#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 +#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 +#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000 +#define SECONDARY_EXEC_ENABLE_PML 0x00020000 +#define SECONDARY_EPT_VE 0x00040000 +#define SECONDARY_ENABLE_XSAV_RESTORE 0x00100000 +#define SECONDARY_EXEC_TSC_SCALING 0x02000000 + +#define PIN_BASED_EXT_INTR_MASK 0x00000001 +#define PIN_BASED_NMI_EXITING 0x00000008 +#define PIN_BASED_VIRTUAL_NMIS 0x00000020 +#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 +#define PIN_BASED_POSTED_INTR 0x00000080 + +#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 + +#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004 +#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 +#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 +#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 +#define VM_EXIT_SAVE_IA32_PAT 0x00040000 +#define VM_EXIT_LOAD_IA32_PAT 0x00080000 +#define VM_EXIT_SAVE_IA32_EFER 0x00100000 +#define VM_EXIT_LOAD_IA32_EFER 0x00200000 +#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 + +#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff + +#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004 +#define VM_ENTRY_IA32E_MODE 0x00000200 +#define VM_ENTRY_SMM 0x00000400 +#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 +#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 +#define VM_ENTRY_LOAD_IA32_PAT 0x00004000 +#define VM_ENTRY_LOAD_IA32_EFER 0x00008000 + +#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff + +#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f +#define VMX_MISC_SAVE_EFER_LMA 0x00000020 + +#define EXIT_REASON_FAILED_VMENTRY 0x80000000 +#define EXIT_REASON_EXCEPTION_NMI 0 +#define EXIT_REASON_EXTERNAL_INTERRUPT 1 +#define EXIT_REASON_TRIPLE_FAULT 2 +#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_NMI_WINDOW 8 +#define EXIT_REASON_TASK_SWITCH 9 +#define EXIT_REASON_CPUID 10 +#define EXIT_REASON_HLT 12 +#define EXIT_REASON_INVD 13 +#define EXIT_REASON_INVLPG 14 +#define EXIT_REASON_RDPMC 15 +#define EXIT_REASON_RDTSC 16 +#define EXIT_REASON_VMCALL 18 +#define EXIT_REASON_VMCLEAR 19 +#define EXIT_REASON_VMLAUNCH 20 +#define EXIT_REASON_VMPTRLD 21 +#define EXIT_REASON_VMPTRST 22 +#define EXIT_REASON_VMREAD 23 +#define EXIT_REASON_VMRESUME 24 +#define EXIT_REASON_VMWRITE 25 +#define EXIT_REASON_VMOFF 26 +#define EXIT_REASON_VMON 27 +#define EXIT_REASON_CR_ACCESS 28 +#define EXIT_REASON_DR_ACCESS 29 +#define EXIT_REASON_IO_INSTRUCTION 30 +#define EXIT_REASON_MSR_READ 31 +#define EXIT_REASON_MSR_WRITE 32 +#define EXIT_REASON_INVALID_STATE 33 +#define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_MONITOR_INSTRUCTION 39 +#define EXIT_REASON_PAUSE_INSTRUCTION 40 +#define EXIT_REASON_MCE_DURING_VMENTRY 41 +#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 +#define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EOI_INDUCED 45 +#define EXIT_REASON_EPT_VIOLATION 48 +#define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_INVEPT 50 +#define EXIT_REASON_RDTSCP 51 +#define EXIT_REASON_PREEMPTION_TIMER 52 +#define EXIT_REASON_INVVPID 53 +#define EXIT_REASON_WBINVD 54 +#define EXIT_REASON_XSETBV 55 +#define EXIT_REASON_APIC_WRITE 56 +#define EXIT_REASON_INVPCID 58 +#define EXIT_REASON_PML_FULL 62 +#define EXIT_REASON_XSAVES 63 +#define EXIT_REASON_XRSTORS 64 +#define LAST_EXIT_REASON 64 + +enum vmcs_field { + VIRTUAL_PROCESSOR_ID = 0x00000000, + POSTED_INTR_NV = 0x00000002, + GUEST_ES_SELECTOR = 0x00000800, + GUEST_CS_SELECTOR = 0x00000802, + GUEST_SS_SELECTOR = 0x00000804, + GUEST_DS_SELECTOR = 0x00000806, + GUEST_FS_SELECTOR = 0x00000808, + GUEST_GS_SELECTOR = 0x0000080a, + GUEST_LDTR_SELECTOR = 0x0000080c, + GUEST_TR_SELECTOR = 0x0000080e, + GUEST_INTR_STATUS = 0x00000810, + GUEST_PML_INDEX = 0x00000812, + HOST_ES_SELECTOR = 0x00000c00, + HOST_CS_SELECTOR = 0x00000c02, + HOST_SS_SELECTOR = 0x00000c04, + HOST_DS_SELECTOR = 0x00000c06, + HOST_FS_SELECTOR = 0x00000c08, + HOST_GS_SELECTOR = 0x00000c0a, + HOST_TR_SELECTOR = 0x00000c0c, + IO_BITMAP_A = 0x00002000, + IO_BITMAP_A_HIGH = 0x00002001, + IO_BITMAP_B = 0x00002002, + IO_BITMAP_B_HIGH = 0x00002003, + MSR_BITMAP = 0x00002004, + MSR_BITMAP_HIGH = 0x00002005, + VM_EXIT_MSR_STORE_ADDR = 0x00002006, + VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007, + VM_EXIT_MSR_LOAD_ADDR = 0x00002008, + VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009, + VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a, + VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b, + PML_ADDRESS = 0x0000200e, + PML_ADDRESS_HIGH = 0x0000200f, + TSC_OFFSET = 0x00002010, + TSC_OFFSET_HIGH = 0x00002011, + VIRTUAL_APIC_PAGE_ADDR = 0x00002012, + VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, + APIC_ACCESS_ADDR = 0x00002014, + APIC_ACCESS_ADDR_HIGH = 0x00002015, + POSTED_INTR_DESC_ADDR = 0x00002016, + POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, + EPT_POINTER = 0x0000201a, + EPT_POINTER_HIGH = 0x0000201b, + EOI_EXIT_BITMAP0 = 0x0000201c, + EOI_EXIT_BITMAP0_HIGH = 0x0000201d, + EOI_EXIT_BITMAP1 = 0x0000201e, + EOI_EXIT_BITMAP1_HIGH = 0x0000201f, + EOI_EXIT_BITMAP2 = 0x00002020, + EOI_EXIT_BITMAP2_HIGH = 0x00002021, + EOI_EXIT_BITMAP3 = 0x00002022, + EOI_EXIT_BITMAP3_HIGH = 0x00002023, + VMREAD_BITMAP = 0x00002026, + VMREAD_BITMAP_HIGH = 0x00002027, + VMWRITE_BITMAP = 0x00002028, + VMWRITE_BITMAP_HIGH = 0x00002029, + XSS_EXIT_BITMAP = 0x0000202C, + XSS_EXIT_BITMAP_HIGH = 0x0000202D, + TSC_MULTIPLIER = 0x00002032, + TSC_MULTIPLIER_HIGH = 0x00002033, + GUEST_PHYSICAL_ADDRESS = 0x00002400, + GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, + VMCS_LINK_POINTER = 0x00002800, + VMCS_LINK_POINTER_HIGH = 0x00002801, + GUEST_IA32_DEBUGCTL = 0x00002802, + GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, + GUEST_IA32_PAT = 0x00002804, + GUEST_IA32_PAT_HIGH = 0x00002805, + GUEST_IA32_EFER = 0x00002806, + GUEST_IA32_EFER_HIGH = 0x00002807, + GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808, + GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809, + GUEST_PDPTR0 = 0x0000280a, + GUEST_PDPTR0_HIGH = 0x0000280b, + GUEST_PDPTR1 = 0x0000280c, + GUEST_PDPTR1_HIGH = 0x0000280d, + GUEST_PDPTR2 = 0x0000280e, + GUEST_PDPTR2_HIGH = 0x0000280f, + GUEST_PDPTR3 = 0x00002810, + GUEST_PDPTR3_HIGH = 0x00002811, + GUEST_BNDCFGS = 0x00002812, + GUEST_BNDCFGS_HIGH = 0x00002813, + HOST_IA32_PAT = 0x00002c00, + HOST_IA32_PAT_HIGH = 0x00002c01, + HOST_IA32_EFER = 0x00002c02, + HOST_IA32_EFER_HIGH = 0x00002c03, + HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, + HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05, + PIN_BASED_VM_EXEC_CONTROL = 0x00004000, + CPU_BASED_VM_EXEC_CONTROL = 0x00004002, + EXCEPTION_BITMAP = 0x00004004, + PAGE_FAULT_ERROR_CODE_MASK = 0x00004006, + PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008, + CR3_TARGET_COUNT = 0x0000400a, + VM_EXIT_CONTROLS = 0x0000400c, + VM_EXIT_MSR_STORE_COUNT = 0x0000400e, + VM_EXIT_MSR_LOAD_COUNT = 0x00004010, + VM_ENTRY_CONTROLS = 0x00004012, + VM_ENTRY_MSR_LOAD_COUNT = 0x00004014, + VM_ENTRY_INTR_INFO_FIELD = 0x00004016, + VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018, + VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, + TPR_THRESHOLD = 0x0000401c, + SECONDARY_VM_EXEC_CONTROL = 0x0000401e, + PLE_GAP = 0x00004020, + PLE_WINDOW = 0x00004022, + VM_INSTRUCTION_ERROR = 0x00004400, + VM_EXIT_REASON = 0x00004402, + VM_EXIT_INTR_INFO = 0x00004404, + VM_EXIT_INTR_ERROR_CODE = 0x00004406, + IDT_VECTORING_INFO_FIELD = 0x00004408, + IDT_VECTORING_ERROR_CODE = 0x0000440a, + VM_EXIT_INSTRUCTION_LEN = 0x0000440c, + VMX_INSTRUCTION_INFO = 0x0000440e, + GUEST_ES_LIMIT = 0x00004800, + GUEST_CS_LIMIT = 0x00004802, + GUEST_SS_LIMIT = 0x00004804, + GUEST_DS_LIMIT = 0x00004806, + GUEST_FS_LIMIT = 0x00004808, + GUEST_GS_LIMIT = 0x0000480a, + GUEST_LDTR_LIMIT = 0x0000480c, + GUEST_TR_LIMIT = 0x0000480e, + GUEST_GDTR_LIMIT = 0x00004810, + GUEST_IDTR_LIMIT = 0x00004812, + GUEST_ES_AR_BYTES = 0x00004814, + GUEST_CS_AR_BYTES = 0x00004816, + GUEST_SS_AR_BYTES = 0x00004818, + GUEST_DS_AR_BYTES = 0x0000481a, + GUEST_FS_AR_BYTES = 0x0000481c, + GUEST_GS_AR_BYTES = 0x0000481e, + GUEST_LDTR_AR_BYTES = 0x00004820, + GUEST_TR_AR_BYTES = 0x00004822, + GUEST_INTERRUPTIBILITY_INFO = 0x00004824, + GUEST_ACTIVITY_STATE = 0X00004826, + GUEST_SYSENTER_CS = 0x0000482A, + VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, + HOST_IA32_SYSENTER_CS = 0x00004c00, + CR0_GUEST_HOST_MASK = 0x00006000, + CR4_GUEST_HOST_MASK = 0x00006002, + CR0_READ_SHADOW = 0x00006004, + CR4_READ_SHADOW = 0x00006006, + CR3_TARGET_VALUE0 = 0x00006008, + CR3_TARGET_VALUE1 = 0x0000600a, + CR3_TARGET_VALUE2 = 0x0000600c, + CR3_TARGET_VALUE3 = 0x0000600e, + EXIT_QUALIFICATION = 0x00006400, + GUEST_LINEAR_ADDRESS = 0x0000640a, + GUEST_CR0 = 0x00006800, + GUEST_CR3 = 0x00006802, + GUEST_CR4 = 0x00006804, + GUEST_ES_BASE = 0x00006806, + GUEST_CS_BASE = 0x00006808, + GUEST_SS_BASE = 0x0000680a, + GUEST_DS_BASE = 0x0000680c, + GUEST_FS_BASE = 0x0000680e, + GUEST_GS_BASE = 0x00006810, + GUEST_LDTR_BASE = 0x00006812, + GUEST_TR_BASE = 0x00006814, + GUEST_GDTR_BASE = 0x00006816, + GUEST_IDTR_BASE = 0x00006818, + GUEST_DR7 = 0x0000681a, + GUEST_RSP = 0x0000681c, + GUEST_RIP = 0x0000681e, + GUEST_RFLAGS = 0x00006820, + GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822, + GUEST_SYSENTER_ESP = 0x00006824, + GUEST_SYSENTER_EIP = 0x00006826, + HOST_CR0 = 0x00006c00, + HOST_CR3 = 0x00006c02, + HOST_CR4 = 0x00006c04, + HOST_FS_BASE = 0x00006c06, + HOST_GS_BASE = 0x00006c08, + HOST_TR_BASE = 0x00006c0a, + HOST_GDTR_BASE = 0x00006c0c, + HOST_IDTR_BASE = 0x00006c0e, + HOST_IA32_SYSENTER_ESP = 0x00006c10, + HOST_IA32_SYSENTER_EIP = 0x00006c12, + HOST_RSP = 0x00006c14, + HOST_RIP = 0x00006c16, +}; + +struct vmx_msr_entry { + uint32_t index; + uint32_t reserved; + uint64_t value; +} __attribute__ ((aligned(16))); + +static inline int vmxon(uint64_t phys) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(phys) + : "cc", "memory"); + + return ret; +} + +static inline void vmxoff(void) +{ + __asm__ __volatile__("vmxoff"); +} + +static inline int vmclear(uint64_t vmcs_pa) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(vmcs_pa) + : "cc", "memory"); + + return ret; +} + +static inline int vmptrld(uint64_t vmcs_pa) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(vmcs_pa) + : "cc", "memory"); + + return ret; +} + +/* + * No guest state (e.g. GPRs) is established by this vmlaunch. + */ +static inline int vmlaunch(void) +{ + int ret; + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "vmwrite %%rsp, %[host_rsp];" + "lea 1f(%%rip), %%rax;" + "vmwrite %%rax, %[host_rip];" + "vmlaunch;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r"((uint64_t)HOST_RSP), + [host_rip]"r"((uint64_t)HOST_RIP) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +/* + * No guest state (e.g. GPRs) is established by this vmresume. + */ +static inline int vmresume(void) +{ + int ret; + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "vmwrite %%rsp, %[host_rsp];" + "lea 1f(%%rip), %%rax;" + "vmwrite %%rax, %[host_rip];" + "vmresume;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r"((uint64_t)HOST_RSP), + [host_rip]"r"((uint64_t)HOST_RIP) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +static inline int vmread(uint64_t encoding, uint64_t *value) +{ + uint64_t tmp; + uint8_t ret; + + __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]" + : [value]"=rm"(tmp), [ret]"=rm"(ret) + : [encoding]"r"(encoding) + : "cc", "memory"); + + *value = tmp; + return ret; +} + +/* + * A wrapper around vmread that ignores errors and returns zero if the + * vmread instruction fails. + */ +static inline uint64_t vmreadz(uint64_t encoding) +{ + uint64_t value = 0; + vmread(encoding, &value); + return value; +} + +static inline int vmwrite(uint64_t encoding, uint64_t value) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]" + : [ret]"=rm"(ret) + : [value]"rm"(value), [encoding]"r"(encoding) + : "cc", "memory"); + + return ret; +} + +static inline uint32_t vmcs_revision(void) +{ + return rdmsr(MSR_IA32_VMX_BASIC); +} + +void prepare_for_vmx_operation(void); +void prepare_vmcs(void *guest_rip, void *guest_rsp); +struct kvm_vm *vm_create_default_vmx(uint32_t vcpuid, + vmx_guest_code_t guest_code); + +#endif /* !SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 7ca1bb40c498..37e2a787d2fc 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -50,8 +50,8 @@ int kvm_check_cap(long cap) int kvm_fd; kvm_fd = open(KVM_DEV_PATH, O_RDONLY); - TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", - KVM_DEV_PATH, kvm_fd, errno); + if (kvm_fd < 0) + exit(KSFT_SKIP); ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap); TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n" @@ -91,8 +91,8 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) vm->mode = mode; kvm_fd = open(KVM_DEV_PATH, perm); - TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", - KVM_DEV_PATH, kvm_fd, errno); + if (kvm_fd < 0) + exit(KSFT_SKIP); /* Create VM. */ vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL); @@ -378,7 +378,7 @@ int kvm_memcmp_hva_gva(void *hva, * complicated. This function uses a reasonable default length for * the array and performs the appropriate allocation. */ -struct kvm_cpuid2 *allocate_kvm_cpuid2(void) +static struct kvm_cpuid2 *allocate_kvm_cpuid2(void) { struct kvm_cpuid2 *cpuid; int nent = 100; @@ -402,26 +402,31 @@ struct kvm_cpuid2 *allocate_kvm_cpuid2(void) * Input Args: None * * Output Args: - * cpuid - The supported KVM CPUID * - * Return: void + * Return: The supported KVM CPUID * * Get the guest CPUID supported by KVM. */ -void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid) +struct kvm_cpuid2 *kvm_get_supported_cpuid(void) { + static struct kvm_cpuid2 *cpuid; int ret; int kvm_fd; + if (cpuid) + return cpuid; + + cpuid = allocate_kvm_cpuid2(); kvm_fd = open(KVM_DEV_PATH, O_RDONLY); - TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", - KVM_DEV_PATH, kvm_fd, errno); + if (kvm_fd < 0) + exit(KSFT_SKIP); ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", ret, errno); close(kvm_fd); + return cpuid; } /* Locate a cpuid entry. @@ -435,12 +440,13 @@ void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid) * Return: A pointer to the cpuid entry. Never returns NULL. */ struct kvm_cpuid_entry2 * -find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function, - uint32_t index) +kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) { + struct kvm_cpuid2 *cpuid; struct kvm_cpuid_entry2 *entry = NULL; int i; + cpuid = kvm_get_supported_cpuid(); for (i = 0; i < cpuid->nent; i++) { if (cpuid->entries[i].function == function && cpuid->entries[i].index == index) { @@ -669,8 +675,8 @@ static int vcpu_mmap_sz(void) int dev_fd, ret; dev_fd = open(KVM_DEV_PATH, O_RDONLY); - TEST_ASSERT(dev_fd >= 0, "%s open %s failed, rc: %i errno: %i", - __func__, KVM_DEV_PATH, dev_fd, errno); + if (dev_fd < 0) + exit(KSFT_SKIP); ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); TEST_ASSERT(ret >= sizeof(struct kvm_run), @@ -1435,7 +1441,7 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, sparsebit_idx_t pg; TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " - "not divisable by page size.\n" + "not divisible by page size.\n" " paddr_min: 0x%lx page_size: 0x%x", paddr_min, vm->page_size); diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c index 0c5cf3e0cb6f..b132bc95d183 100644 --- a/tools/testing/selftests/kvm/lib/sparsebit.c +++ b/tools/testing/selftests/kvm/lib/sparsebit.c @@ -121,7 +121,7 @@ * avoided by moving the setting of the nodes mask bits into * the previous nodes num_after setting. * - * + Node starting index is evenly divisable by the number of bits + * + Node starting index is evenly divisible by the number of bits * within a nodes mask member. * * + Nodes never represent a range of bits that wrap around the @@ -1741,7 +1741,7 @@ void sparsebit_validate_internal(struct sparsebit *s) /* Validate node index is divisible by the mask size */ if (nodep->idx % MASK_BITS) { - fprintf(stderr, "Node index not divisable by " + fprintf(stderr, "Node index not divisible by " "mask size,\n" " nodep: %p nodep->idx: 0x%lx " "MASK_BITS: %lu\n", diff --git a/tools/testing/selftests/kvm/lib/vmx.c b/tools/testing/selftests/kvm/lib/vmx.c new file mode 100644 index 000000000000..0231bc0aae7b --- /dev/null +++ b/tools/testing/selftests/kvm/lib/vmx.c @@ -0,0 +1,243 @@ +/* + * tools/testing/selftests/kvm/lib/x86.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include "test_util.h" +#include "kvm_util.h" +#include "x86.h" +#include "vmx.h" + +/* Create a default VM for VMX tests. + * + * Input Args: + * vcpuid - The id of the single VCPU to add to the VM. + * guest_code - The vCPU's entry point + * + * Output Args: None + * + * Return: + * Pointer to opaque structure that describes the created VM. + */ +struct kvm_vm * +vm_create_default_vmx(uint32_t vcpuid, vmx_guest_code_t guest_code) +{ + struct kvm_cpuid2 *cpuid; + struct kvm_vm *vm; + vm_vaddr_t vmxon_vaddr; + vm_paddr_t vmxon_paddr; + vm_vaddr_t vmcs_vaddr; + vm_paddr_t vmcs_paddr; + + vm = vm_create_default(vcpuid, (void *) guest_code); + + /* Enable nesting in CPUID */ + vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); + + /* Setup of a region of guest memory for the vmxon region. */ + vmxon_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0); + vmxon_paddr = addr_gva2gpa(vm, vmxon_vaddr); + + /* Setup of a region of guest memory for a vmcs. */ + vmcs_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0); + vmcs_paddr = addr_gva2gpa(vm, vmcs_vaddr); + + vcpu_args_set(vm, vcpuid, 4, vmxon_vaddr, vmxon_paddr, vmcs_vaddr, + vmcs_paddr); + + return vm; +} + +void prepare_for_vmx_operation(void) +{ + uint64_t feature_control; + uint64_t required; + unsigned long cr0; + unsigned long cr4; + + /* + * Ensure bits in CR0 and CR4 are valid in VMX operation: + * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx. + * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx. + */ + __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory"); + cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1); + cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0); + __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory"); + + __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory"); + cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1); + cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0); + /* Enable VMX operation */ + cr4 |= X86_CR4_VMXE; + __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory"); + + /* + * Configure IA32_FEATURE_CONTROL MSR to allow VMXON: + * Bit 0: Lock bit. If clear, VMXON causes a #GP. + * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON + * outside of SMX causes a #GP. + */ + required = FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + required |= FEATURE_CONTROL_LOCKED; + feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); + if ((feature_control & required) != required) + wrmsr(MSR_IA32_FEATURE_CONTROL, feature_control | required); +} + +/* + * Initialize the control fields to the most basic settings possible. + */ +static inline void init_vmcs_control_fields(void) +{ + vmwrite(VIRTUAL_PROCESSOR_ID, 0); + vmwrite(POSTED_INTR_NV, 0); + + vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PINBASED_CTLS)); + vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PROCBASED_CTLS)); + vmwrite(EXCEPTION_BITMAP, 0); + vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); + vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */ + vmwrite(CR3_TARGET_COUNT, 0); + vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) | + VM_EXIT_HOST_ADDR_SPACE_SIZE); /* 64-bit host */ + vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); + vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); + vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) | + VM_ENTRY_IA32E_MODE); /* 64-bit guest */ + vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); + vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0); + vmwrite(TPR_THRESHOLD, 0); + vmwrite(SECONDARY_VM_EXEC_CONTROL, 0); + + vmwrite(CR0_GUEST_HOST_MASK, 0); + vmwrite(CR4_GUEST_HOST_MASK, 0); + vmwrite(CR0_READ_SHADOW, get_cr0()); + vmwrite(CR4_READ_SHADOW, get_cr4()); +} + +/* + * Initialize the host state fields based on the current host state, with + * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch + * or vmresume. + */ +static inline void init_vmcs_host_state(void) +{ + uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS); + + vmwrite(HOST_ES_SELECTOR, get_es()); + vmwrite(HOST_CS_SELECTOR, get_cs()); + vmwrite(HOST_SS_SELECTOR, get_ss()); + vmwrite(HOST_DS_SELECTOR, get_ds()); + vmwrite(HOST_FS_SELECTOR, get_fs()); + vmwrite(HOST_GS_SELECTOR, get_gs()); + vmwrite(HOST_TR_SELECTOR, get_tr()); + + if (exit_controls & VM_EXIT_LOAD_IA32_PAT) + vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT)); + if (exit_controls & VM_EXIT_LOAD_IA32_EFER) + vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER)); + if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) + vmwrite(HOST_IA32_PERF_GLOBAL_CTRL, + rdmsr(MSR_CORE_PERF_GLOBAL_CTRL)); + + vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS)); + + vmwrite(HOST_CR0, get_cr0()); + vmwrite(HOST_CR3, get_cr3()); + vmwrite(HOST_CR4, get_cr4()); + vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE)); + vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE)); + vmwrite(HOST_TR_BASE, + get_desc64_base((struct desc64 *)(get_gdt_base() + get_tr()))); + vmwrite(HOST_GDTR_BASE, get_gdt_base()); + vmwrite(HOST_IDTR_BASE, get_idt_base()); + vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP)); + vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP)); +} + +/* + * Initialize the guest state fields essentially as a clone of + * the host state fields. Some host state fields have fixed + * values, and we set the corresponding guest state fields accordingly. + */ +static inline void init_vmcs_guest_state(void *rip, void *rsp) +{ + vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR)); + vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR)); + vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR)); + vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR)); + vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR)); + vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR)); + vmwrite(GUEST_LDTR_SELECTOR, 0); + vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR)); + vmwrite(GUEST_INTR_STATUS, 0); + vmwrite(GUEST_PML_INDEX, 0); + + vmwrite(VMCS_LINK_POINTER, -1ll); + vmwrite(GUEST_IA32_DEBUGCTL, 0); + vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT)); + vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER)); + vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL, + vmreadz(HOST_IA32_PERF_GLOBAL_CTRL)); + + vmwrite(GUEST_ES_LIMIT, -1); + vmwrite(GUEST_CS_LIMIT, -1); + vmwrite(GUEST_SS_LIMIT, -1); + vmwrite(GUEST_DS_LIMIT, -1); + vmwrite(GUEST_FS_LIMIT, -1); + vmwrite(GUEST_GS_LIMIT, -1); + vmwrite(GUEST_LDTR_LIMIT, -1); + vmwrite(GUEST_TR_LIMIT, 0x67); + vmwrite(GUEST_GDTR_LIMIT, 0xffff); + vmwrite(GUEST_IDTR_LIMIT, 0xffff); + vmwrite(GUEST_ES_AR_BYTES, + vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_CS_AR_BYTES, 0xa09b); + vmwrite(GUEST_SS_AR_BYTES, 0xc093); + vmwrite(GUEST_DS_AR_BYTES, + vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_FS_AR_BYTES, + vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_GS_AR_BYTES, + vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_LDTR_AR_BYTES, 0x10000); + vmwrite(GUEST_TR_AR_BYTES, 0x8b); + vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); + vmwrite(GUEST_ACTIVITY_STATE, 0); + vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS)); + vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0); + + vmwrite(GUEST_CR0, vmreadz(HOST_CR0)); + vmwrite(GUEST_CR3, vmreadz(HOST_CR3)); + vmwrite(GUEST_CR4, vmreadz(HOST_CR4)); + vmwrite(GUEST_ES_BASE, 0); + vmwrite(GUEST_CS_BASE, 0); + vmwrite(GUEST_SS_BASE, 0); + vmwrite(GUEST_DS_BASE, 0); + vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE)); + vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE)); + vmwrite(GUEST_LDTR_BASE, 0); + vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE)); + vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE)); + vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE)); + vmwrite(GUEST_DR7, 0x400); + vmwrite(GUEST_RSP, (uint64_t)rsp); + vmwrite(GUEST_RIP, (uint64_t)rip); + vmwrite(GUEST_RFLAGS, 2); + vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0); + vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP)); + vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP)); +} + +void prepare_vmcs(void *guest_rip, void *guest_rsp) +{ + init_vmcs_control_fields(); + init_vmcs_host_state(); + init_vmcs_guest_state(guest_rip, guest_rsp); +} diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/sync_regs_test.c index 428e9473f5e2..eae1ece3c31b 100644 --- a/tools/testing/selftests/kvm/sync_regs_test.c +++ b/tools/testing/selftests/kvm/sync_regs_test.c @@ -85,6 +85,9 @@ static void compare_vcpu_events(struct kvm_vcpu_events *left, { } +#define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS) +#define INVALID_SYNC_FIELD 0x80000000 + int main(int argc, char *argv[]) { struct kvm_vm *vm; @@ -98,9 +101,14 @@ int main(int argc, char *argv[]) setbuf(stdout, NULL); cap = kvm_check_cap(KVM_CAP_SYNC_REGS); - TEST_ASSERT((unsigned long)cap == KVM_SYNC_X86_VALID_FIELDS, - "KVM_CAP_SYNC_REGS (0x%x) != KVM_SYNC_X86_VALID_FIELDS (0x%lx)\n", - cap, KVM_SYNC_X86_VALID_FIELDS); + if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) { + fprintf(stderr, "KVM_CAP_SYNC_REGS not supported, skipping test\n"); + exit(KSFT_SKIP); + } + if ((cap & INVALID_SYNC_FIELD) != 0) { + fprintf(stderr, "The \"invalid\" field is not invalid, skipping test\n"); + exit(KSFT_SKIP); + } /* Create VM */ vm = vm_create_default(VCPU_ID, guest_code); @@ -108,7 +116,14 @@ int main(int argc, char *argv[]) run = vcpu_state(vm, VCPU_ID); /* Request reading invalid register set from VCPU. */ - run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS << 1; + run->kvm_valid_regs = INVALID_SYNC_FIELD; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; + + run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(rv < 0 && errno == EINVAL, "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", @@ -116,7 +131,14 @@ int main(int argc, char *argv[]) vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; /* Request setting invalid register set into VCPU. */ - run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS << 1; + run->kvm_dirty_regs = INVALID_SYNC_FIELD; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0; + + run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(rv < 0 && errno == EINVAL, "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", @@ -125,7 +147,7 @@ int main(int argc, char *argv[]) /* Request and verify all valid register sets. */ /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */ - run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS; + run->kvm_valid_regs = TEST_SYNC_FIELDS; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s),\n", @@ -146,7 +168,7 @@ int main(int argc, char *argv[]) run->s.regs.sregs.apic_base = 1 << 11; /* TODO run->s.regs.events.XYZ = ABC; */ - run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS; + run->kvm_valid_regs = TEST_SYNC_FIELDS; run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, @@ -172,7 +194,7 @@ int main(int argc, char *argv[]) /* Clear kvm_dirty_regs bits, verify new s.regs values are * overwritten with existing guest values. */ - run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS; + run->kvm_valid_regs = TEST_SYNC_FIELDS; run->kvm_dirty_regs = 0; run->s.regs.regs.r11 = 0xDEADBEEF; rv = _vcpu_run(vm, VCPU_ID); @@ -211,7 +233,7 @@ int main(int argc, char *argv[]) * with kvm_sync_regs values. */ run->kvm_valid_regs = 0; - run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS; + run->kvm_dirty_regs = TEST_SYNC_FIELDS; run->s.regs.regs.r11 = 0xBBBB; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c new file mode 100644 index 000000000000..aaa633263b2c --- /dev/null +++ b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c @@ -0,0 +1,231 @@ +/* + * gtests/tests/vmx_tsc_adjust_test.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * + * IA32_TSC_ADJUST test + * + * According to the SDM, "if an execution of WRMSR to the + * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC, + * the logical processor also adds (or subtracts) value X from the + * IA32_TSC_ADJUST MSR. + * + * Note that when L1 doesn't intercept writes to IA32_TSC, a + * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC + * value. + * + * This test verifies that this unusual case is handled correctly. + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "x86.h" +#include "vmx.h" + +#include <string.h> +#include <sys/ioctl.h> + +#ifndef MSR_IA32_TSC_ADJUST +#define MSR_IA32_TSC_ADJUST 0x3b +#endif + +#define PAGE_SIZE 4096 +#define VCPU_ID 5 + +#define TSC_ADJUST_VALUE (1ll << 32) +#define TSC_OFFSET_VALUE -(1ll << 48) + +enum { + PORT_ABORT = 0x1000, + PORT_REPORT, + PORT_DONE, +}; + +struct vmx_page { + vm_vaddr_t virt; + vm_paddr_t phys; +}; + +enum { + VMXON_PAGE = 0, + VMCS_PAGE, + MSR_BITMAP_PAGE, + + NUM_VMX_PAGES, +}; + +struct kvm_single_msr { + struct kvm_msrs header; + struct kvm_msr_entry entry; +} __attribute__((packed)); + +/* The virtual machine object. */ +static struct kvm_vm *vm; + +/* Array of vmx_page descriptors that is shared with the guest. */ +struct vmx_page *vmx_pages; + +#define exit_to_l0(_port, _arg) do_exit_to_l0(_port, (unsigned long) (_arg)) +static void do_exit_to_l0(uint16_t port, unsigned long arg) +{ + __asm__ __volatile__("in %[port], %%al" + : + : [port]"d"(port), "D"(arg) + : "rax"); +} + + +#define GUEST_ASSERT(_condition) do { \ + if (!(_condition)) \ + exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition); \ +} while (0) + +static void check_ia32_tsc_adjust(int64_t max) +{ + int64_t adjust; + + adjust = rdmsr(MSR_IA32_TSC_ADJUST); + exit_to_l0(PORT_REPORT, adjust); + GUEST_ASSERT(adjust <= max); +} + +static void l2_guest_code(void) +{ + uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE; + + wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE); + check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); + + /* Exit to L1 */ + __asm__ __volatile__("vmcall"); +} + +static void l1_guest_code(struct vmx_page *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uint32_t control; + uintptr_t save_cr3; + + GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE); + wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE); + check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); + + prepare_for_vmx_operation(); + + /* Enter VMX root operation. */ + *(uint32_t *)vmx_pages[VMXON_PAGE].virt = vmcs_revision(); + GUEST_ASSERT(!vmxon(vmx_pages[VMXON_PAGE].phys)); + + /* Load a VMCS. */ + *(uint32_t *)vmx_pages[VMCS_PAGE].virt = vmcs_revision(); + GUEST_ASSERT(!vmclear(vmx_pages[VMCS_PAGE].phys)); + GUEST_ASSERT(!vmptrld(vmx_pages[VMCS_PAGE].phys)); + + /* Prepare the VMCS for L2 execution. */ + prepare_vmcs(l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + vmwrite(MSR_BITMAP, vmx_pages[MSR_BITMAP_PAGE].phys); + vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); + + /* Jump into L2. First, test failure to load guest CR3. */ + save_cr3 = vmreadz(GUEST_CR3); + vmwrite(GUEST_CR3, -1ull); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == + (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE)); + check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); + vmwrite(GUEST_CR3, save_cr3); + + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); + + exit_to_l0(PORT_DONE, 0); +} + +static void allocate_vmx_page(struct vmx_page *page) +{ + vm_vaddr_t virt; + + virt = vm_vaddr_alloc(vm, PAGE_SIZE, 0, 0, 0); + memset(addr_gva2hva(vm, virt), 0, PAGE_SIZE); + + page->virt = virt; + page->phys = addr_gva2gpa(vm, virt); +} + +static vm_vaddr_t allocate_vmx_pages(void) +{ + vm_vaddr_t vmx_pages_vaddr; + int i; + + vmx_pages_vaddr = vm_vaddr_alloc( + vm, sizeof(struct vmx_page) * NUM_VMX_PAGES, 0, 0, 0); + + vmx_pages = (void *) addr_gva2hva(vm, vmx_pages_vaddr); + + for (i = 0; i < NUM_VMX_PAGES; i++) + allocate_vmx_page(&vmx_pages[i]); + + return vmx_pages_vaddr; +} + +void report(int64_t val) +{ + printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n", + val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_vaddr; + struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); + + if (!(entry->ecx & CPUID_VMX)) { + fprintf(stderr, "nested VMX not enabled, skipping test\n"); + exit(KSFT_SKIP); + } + + vm = vm_create_default_vmx(VCPU_ID, (void *) l1_guest_code); + + /* Allocate VMX pages and shared descriptors (vmx_pages). */ + vmx_pages_vaddr = allocate_vmx_pages(); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_vaddr); + + for (;;) { + volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct kvm_regs regs; + + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Got exit_reason other than KVM_EXIT_IO: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + vcpu_regs_get(vm, VCPU_ID, ®s); + + switch (run->io.port) { + case PORT_ABORT: + TEST_ASSERT(false, "%s", (const char *) regs.rdi); + /* NOT REACHED */ + case PORT_REPORT: + report(regs.rdi); + break; + case PORT_DONE: + goto done; + default: + TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port); + } + } + + kvm_vm_free(vm); +done: + return 0; +} diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 195e9d4739a9..c1b1a4dc6a96 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -20,10 +20,10 @@ all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) .ONESHELL: define RUN_TESTS - @export KSFT_TAP_LEVEL=`echo 1`; - @test_num=`echo 0`; - @echo "TAP version 13"; - @for TEST in $(1); do \ + @export KSFT_TAP_LEVEL=`echo 1`; \ + test_num=`echo 0`; \ + echo "TAP version 13"; \ + for TEST in $(1); do \ BASENAME_TEST=`basename $$TEST`; \ test_num=`echo $$test_num+1 | bc`; \ echo "selftests: $$BASENAME_TEST"; \ diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 785fc18a16b4..3ff81a478dbe 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -6,6 +6,7 @@ CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh +TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 6a75a3ea44ad..7ba089b33e8b 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -7,3 +7,8 @@ CONFIG_NET_L3_MASTER_DEV=y CONFIG_IPV6=y CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_VETH=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_NET_IPVTI=y +CONFIG_INET6_XFRM_MODE_TUNNEL=y +CONFIG_IPV6_VTI=y +CONFIG_DUMMY=y diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c index 365c32e84189..c9f478b40996 100644 --- a/tools/testing/selftests/net/reuseport_bpf_numa.c +++ b/tools/testing/selftests/net/reuseport_bpf_numa.c @@ -23,6 +23,8 @@ #include <unistd.h> #include <numa.h> +#include "../kselftest.h" + static const int PORT = 8888; static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto) @@ -229,7 +231,7 @@ int main(void) int *rcv_fd, nodes; if (numa_available() < 0) - error(1, errno, "no numa api support"); + ksft_exit_skip("no numa api support\n"); nodes = numa_max_node() + 1; diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c index e38ad6d94d4b..fcff7047000d 100644 --- a/tools/testing/selftests/proc/proc-loadavg-001.c +++ b/tools/testing/selftests/proc/proc-loadavg-001.c @@ -1,5 +1,5 @@ /* - * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com> + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above diff --git a/tools/testing/selftests/proc/proc-self-map-files-001.c b/tools/testing/selftests/proc/proc-self-map-files-001.c index af1d0a6af810..4209c64283d6 100644 --- a/tools/testing/selftests/proc/proc-self-map-files-001.c +++ b/tools/testing/selftests/proc/proc-self-map-files-001.c @@ -1,5 +1,5 @@ /* - * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com> + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c index aebf4be56111..6f1f4a6e1ecb 100644 --- a/tools/testing/selftests/proc/proc-self-map-files-002.c +++ b/tools/testing/selftests/proc/proc-self-map-files-002.c @@ -1,5 +1,5 @@ /* - * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com> + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c index 05eb6f91f1e9..5ab5f4810e43 100644 --- a/tools/testing/selftests/proc/proc-self-syscall.c +++ b/tools/testing/selftests/proc/proc-self-syscall.c @@ -1,3 +1,18 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ #define _GNU_SOURCE #include <unistd.h> #include <sys/syscall.h> diff --git a/tools/testing/selftests/proc/proc-self-wchan.c b/tools/testing/selftests/proc/proc-self-wchan.c index b8d8728a6869..a38b2fbaa7ad 100644 --- a/tools/testing/selftests/proc/proc-self-wchan.c +++ b/tools/testing/selftests/proc/proc-self-wchan.c @@ -1,3 +1,18 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> diff --git a/tools/testing/selftests/proc/proc-uptime-001.c b/tools/testing/selftests/proc/proc-uptime-001.c index 303f26092306..781f7a50fc3f 100644 --- a/tools/testing/selftests/proc/proc-uptime-001.c +++ b/tools/testing/selftests/proc/proc-uptime-001.c @@ -1,5 +1,5 @@ /* - * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com> + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c index 0cb79e1f1674..30e2b7849089 100644 --- a/tools/testing/selftests/proc/proc-uptime-002.c +++ b/tools/testing/selftests/proc/proc-uptime-002.c @@ -1,5 +1,5 @@ /* - * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com> + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above diff --git a/tools/testing/selftests/proc/proc-uptime.h b/tools/testing/selftests/proc/proc-uptime.h index d584419f50a7..0e464b50e9d9 100644 --- a/tools/testing/selftests/proc/proc-uptime.h +++ b/tools/testing/selftests/proc/proc-uptime.h @@ -1,5 +1,5 @@ /* - * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com> + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c index 12e397f78592..1e73c2232097 100644 --- a/tools/testing/selftests/proc/read.c +++ b/tools/testing/selftests/proc/read.c @@ -1,5 +1,5 @@ /* - * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com> + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 168c66d74fc5..e1473234968d 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -134,11 +134,15 @@ struct seccomp_data { #endif #ifndef SECCOMP_FILTER_FLAG_TSYNC -#define SECCOMP_FILTER_FLAG_TSYNC 1 +#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) #endif #ifndef SECCOMP_FILTER_FLAG_LOG -#define SECCOMP_FILTER_FLAG_LOG 2 +#define SECCOMP_FILTER_FLAG_LOG (1UL << 1) +#endif + +#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW +#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) #endif #ifndef PTRACE_SECCOMP_GET_METADATA @@ -2072,14 +2076,26 @@ TEST(seccomp_syscall_mode_lock) TEST(detect_seccomp_filter_flags) { unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, - SECCOMP_FILTER_FLAG_LOG }; + SECCOMP_FILTER_FLAG_LOG, + SECCOMP_FILTER_FLAG_SPEC_ALLOW }; unsigned int flag, all_flags; int i; long ret; /* Test detection of known-good filter flags */ for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { + int bits = 0; + flag = flags[i]; + /* Make sure the flag is a single bit! */ + while (flag) { + if (flag & 0x1) + bits ++; + flag >>= 1; + } + ASSERT_EQ(1, bits); + flag = flags[i]; + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); ASSERT_NE(ENOSYS, errno) { TH_LOG("Kernel does not support seccomp syscall!"); diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json index 5b012f4981d4..6f289a49e5ec 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json @@ -66,7 +66,7 @@ "cmdUnderTest": "$TC action add action bpf object-file _b.o index 667", "expExitCode": "0", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c default-action pipe.*index 667 ref", + "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c( jited)? default-action pipe.*index 667 ref", "matchCount": "1", "teardown": [ "$TC action flush action bpf", @@ -92,10 +92,15 @@ "cmdUnderTest": "$TC action add action bpf object-file _c.o index 667", "expExitCode": "255", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9].*index 667 ref", + "matchPattern": "action order [0-9]*: bpf _c.o:\\[action\\] id [0-9].*index 667 ref", "matchCount": "0", "teardown": [ - "$TC action flush action bpf", + [ + "$TC action flush action bpf", + 0, + 1, + 255 + ], "rm -f _c.o" ] }, diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index d744991c0f4f..39f66bc29b82 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -11,7 +11,7 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \ - protection_keys test_vdso test_vsyscall + protection_keys test_vdso test_vsyscall mov_ss_trap TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c new file mode 100644 index 000000000000..3c3a022654f3 --- /dev/null +++ b/tools/testing/selftests/x86/mov_ss_trap.c @@ -0,0 +1,285 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * mov_ss_trap.c: Exercise the bizarre side effects of a watchpoint on MOV SS + * + * This does MOV SS from a watchpointed address followed by various + * types of kernel entries. A MOV SS that hits a watchpoint will queue + * up a #DB trap but will not actually deliver that trap. The trap + * will be delivered after the next instruction instead. The CPU's logic + * seems to be: + * + * - Any fault: drop the pending #DB trap. + * - INT $N, INT3, INTO, SYSCALL, SYSENTER: enter the kernel and then + * deliver #DB. + * - ICEBP: enter the kernel but do not deliver the watchpoint trap + * - breakpoint: only one #DB is delivered (phew!) + * + * There are plenty of ways for a kernel to handle this incorrectly. This + * test tries to exercise all the cases. + * + * This should mostly cover CVE-2018-1087 and CVE-2018-8897. + */ +#define _GNU_SOURCE + +#include <stdlib.h> +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/user.h> +#include <sys/syscall.h> +#include <unistd.h> +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <err.h> +#include <string.h> +#include <setjmp.h> +#include <sys/prctl.h> + +#define X86_EFLAGS_RF (1UL << 16) + +#if __x86_64__ +# define REG_IP REG_RIP +#else +# define REG_IP REG_EIP +#endif + +unsigned short ss; +extern unsigned char breakpoint_insn[]; +sigjmp_buf jmpbuf; +static unsigned char altstack_data[SIGSTKSZ]; + +static void enable_watchpoint(void) +{ + pid_t parent = getpid(); + int status; + + pid_t child = fork(); + if (child < 0) + err(1, "fork"); + + if (child) { + if (waitpid(child, &status, 0) != child) + err(1, "waitpid for child"); + } else { + unsigned long dr0, dr1, dr7; + + dr0 = (unsigned long)&ss; + dr1 = (unsigned long)breakpoint_insn; + dr7 = ((1UL << 1) | /* G0 */ + (3UL << 16) | /* RW0 = read or write */ + (1UL << 18) | /* LEN0 = 2 bytes */ + (1UL << 3)); /* G1, RW1 = insn */ + + if (ptrace(PTRACE_ATTACH, parent, NULL, NULL) != 0) + err(1, "PTRACE_ATTACH"); + + if (waitpid(parent, &status, 0) != parent) + err(1, "waitpid for child"); + + if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[0]), dr0) != 0) + err(1, "PTRACE_POKEUSER DR0"); + + if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[1]), dr1) != 0) + err(1, "PTRACE_POKEUSER DR1"); + + if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[7]), dr7) != 0) + err(1, "PTRACE_POKEUSER DR7"); + + printf("\tDR0 = %lx, DR1 = %lx, DR7 = %lx\n", dr0, dr1, dr7); + + if (ptrace(PTRACE_DETACH, parent, NULL, NULL) != 0) + err(1, "PTRACE_DETACH"); + + exit(0); + } +} + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static char const * const signames[] = { + [SIGSEGV] = "SIGSEGV", + [SIGBUS] = "SIBGUS", + [SIGTRAP] = "SIGTRAP", + [SIGILL] = "SIGILL", +}; + +static void sigtrap(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = ctx_void; + + printf("\tGot SIGTRAP with RIP=%lx, EFLAGS.RF=%d\n", + (unsigned long)ctx->uc_mcontext.gregs[REG_IP], + !!(ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_RF)); +} + +static void handle_and_return(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = ctx_void; + + printf("\tGot %s with RIP=%lx\n", signames[sig], + (unsigned long)ctx->uc_mcontext.gregs[REG_IP]); +} + +static void handle_and_longjmp(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = ctx_void; + + printf("\tGot %s with RIP=%lx\n", signames[sig], + (unsigned long)ctx->uc_mcontext.gregs[REG_IP]); + + siglongjmp(jmpbuf, 1); +} + +int main() +{ + unsigned long nr; + + asm volatile ("mov %%ss, %[ss]" : [ss] "=m" (ss)); + printf("\tSS = 0x%hx, &SS = 0x%p\n", ss, &ss); + + if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == 0) + printf("\tPR_SET_PTRACER_ANY succeeded\n"); + + printf("\tSet up a watchpoint\n"); + sethandler(SIGTRAP, sigtrap, 0); + enable_watchpoint(); + + printf("[RUN]\tRead from watched memory (should get SIGTRAP)\n"); + asm volatile ("mov %[ss], %[tmp]" : [tmp] "=r" (nr) : [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; INT3\n"); + asm volatile ("mov %[ss], %%ss; int3" :: [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; INT 3\n"); + asm volatile ("mov %[ss], %%ss; .byte 0xcd, 0x3" :: [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; CS CS INT3\n"); + asm volatile ("mov %[ss], %%ss; .byte 0x2e, 0x2e; int3" :: [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; CSx14 INT3\n"); + asm volatile ("mov %[ss], %%ss; .fill 14,1,0x2e; int3" :: [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; INT 4\n"); + sethandler(SIGSEGV, handle_and_return, SA_RESETHAND); + asm volatile ("mov %[ss], %%ss; int $4" :: [ss] "m" (ss)); + +#ifdef __i386__ + printf("[RUN]\tMOV SS; INTO\n"); + sethandler(SIGSEGV, handle_and_return, SA_RESETHAND); + nr = -1; + asm volatile ("add $1, %[tmp]; mov %[ss], %%ss; into" + : [tmp] "+r" (nr) : [ss] "m" (ss)); +#endif + + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; ICEBP\n"); + + /* Some emulators (e.g. QEMU TCG) don't emulate ICEBP. */ + sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND); + + asm volatile ("mov %[ss], %%ss; .byte 0xf1" :: [ss] "m" (ss)); + } + + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; CLI\n"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); + asm volatile ("mov %[ss], %%ss; cli" :: [ss] "m" (ss)); + } + + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; #PF\n"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); + asm volatile ("mov %[ss], %%ss; mov (-1), %[tmp]" + : [tmp] "=r" (nr) : [ss] "m" (ss)); + } + + /* + * INT $1: if #DB has DPL=3 and there isn't special handling, + * then the kernel will die. + */ + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; INT 1\n"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); + asm volatile ("mov %[ss], %%ss; int $1" :: [ss] "m" (ss)); + } + +#ifdef __x86_64__ + /* + * In principle, we should test 32-bit SYSCALL as well, but + * the calling convention is so unpredictable that it's + * not obviously worth the effort. + */ + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; SYSCALL\n"); + sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND); + nr = SYS_getpid; + /* + * Toggle the high bit of RSP to make it noncanonical to + * strengthen this test on non-SMAP systems. + */ + asm volatile ("btc $63, %%rsp\n\t" + "mov %[ss], %%ss; syscall\n\t" + "btc $63, %%rsp" + : "+a" (nr) : [ss] "m" (ss) + : "rcx" +#ifdef __x86_64__ + , "r11" +#endif + ); + } +#endif + + printf("[RUN]\tMOV SS; breakpointed NOP\n"); + asm volatile ("mov %[ss], %%ss; breakpoint_insn: nop" :: [ss] "m" (ss)); + + /* + * Invoking SYSENTER directly breaks all the rules. Just handle + * the SIGSEGV. + */ + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; SYSENTER\n"); + stack_t stack = { + .ss_sp = altstack_data, + .ss_size = SIGSTKSZ, + }; + if (sigaltstack(&stack, NULL) != 0) + err(1, "sigaltstack"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK); + nr = SYS_getpid; + asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr) + : [ss] "m" (ss) : "flags", "rcx" +#ifdef __x86_64__ + , "r11" +#endif + ); + + /* We're unreachable here. SYSENTER forgets RIP. */ + } + + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; INT $0x80\n"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); + nr = 20; /* compat getpid */ + asm volatile ("mov %[ss], %%ss; int $0x80" + : "+a" (nr) : [ss] "m" (ss) + : "flags" +#ifdef __x86_64__ + , "r8", "r9", "r10", "r11" +#endif + ); + } + + printf("[OK]\tI aten't dead\n"); + return 0; +} diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c index 9c0325e1ea68..50f7e9272481 100644 --- a/tools/testing/selftests/x86/mpx-mini-test.c +++ b/tools/testing/selftests/x86/mpx-mini-test.c @@ -368,6 +368,11 @@ static int expected_bnd_index = -1; uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */ unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS]; +/* Failed address bound checks: */ +#ifndef SEGV_BNDERR +# define SEGV_BNDERR 3 +#endif + /* * The kernel is supposed to provide some information about the bounds * exception in the siginfo. It should match what we have in the bounds @@ -419,8 +424,6 @@ void handler(int signum, siginfo_t *si, void *vucontext) br_count++; dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count); -#define SEGV_BNDERR 3 /* failed address bound checks */ - dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n", status, ip, br_reason); dprintf2("si_signo: %d\n", si->si_signo); diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h index b3cb7670e026..254e5436bdd9 100644 --- a/tools/testing/selftests/x86/pkey-helpers.h +++ b/tools/testing/selftests/x86/pkey-helpers.h @@ -26,30 +26,26 @@ static inline void sigsafe_printf(const char *format, ...) { va_list ap; - va_start(ap, format); if (!dprint_in_signal) { + va_start(ap, format); vprintf(format, ap); + va_end(ap); } else { int ret; - int len = vsnprintf(dprint_in_signal_buffer, - DPRINT_IN_SIGNAL_BUF_SIZE, - format, ap); /* - * len is amount that would have been printed, - * but actual write is truncated at BUF_SIZE. + * No printf() functions are signal-safe. + * They deadlock easily. Write the format + * string to get some output, even if + * incomplete. */ - if (len > DPRINT_IN_SIGNAL_BUF_SIZE) - len = DPRINT_IN_SIGNAL_BUF_SIZE; - ret = write(1, dprint_in_signal_buffer, len); + ret = write(1, format, strlen(format)); if (ret < 0) - abort(); + exit(1); } - va_end(ap); } #define dprintf_level(level, args...) do { \ if (level <= DEBUG_LEVEL) \ sigsafe_printf(args); \ - fflush(NULL); \ } while (0) #define dprintf0(args...) dprintf_level(0, args) #define dprintf1(args...) dprintf_level(1, args) diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index f15aa5a76fe3..460b4bdf4c1e 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -72,10 +72,9 @@ extern void abort_hooks(void); test_nr, iteration_nr); \ dprintf0("errno at assert: %d", errno); \ abort_hooks(); \ - assert(condition); \ + exit(__LINE__); \ } \ } while (0) -#define raw_assert(cond) assert(cond) void cat_into_file(char *str, char *file) { @@ -87,12 +86,17 @@ void cat_into_file(char *str, char *file) * these need to be raw because they are called under * pkey_assert() */ - raw_assert(fd >= 0); + if (fd < 0) { + fprintf(stderr, "error opening '%s'\n", str); + perror("error: "); + exit(__LINE__); + } + ret = write(fd, str, strlen(str)); if (ret != strlen(str)) { perror("write to file failed"); fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); - raw_assert(0); + exit(__LINE__); } close(fd); } @@ -191,26 +195,30 @@ void lots_o_noops_around_write(int *write_to_me) #ifdef __i386__ #ifndef SYS_mprotect_key -# define SYS_mprotect_key 380 +# define SYS_mprotect_key 380 #endif + #ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 381 -# define SYS_pkey_free 382 +# define SYS_pkey_alloc 381 +# define SYS_pkey_free 382 #endif -#define REG_IP_IDX REG_EIP -#define si_pkey_offset 0x14 + +#define REG_IP_IDX REG_EIP +#define si_pkey_offset 0x14 #else #ifndef SYS_mprotect_key -# define SYS_mprotect_key 329 +# define SYS_mprotect_key 329 #endif + #ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 330 -# define SYS_pkey_free 331 +# define SYS_pkey_alloc 330 +# define SYS_pkey_free 331 #endif -#define REG_IP_IDX REG_RIP -#define si_pkey_offset 0x20 + +#define REG_IP_IDX REG_RIP +#define si_pkey_offset 0x20 #endif @@ -225,8 +233,14 @@ void dump_mem(void *dumpme, int len_bytes) } } -#define SEGV_BNDERR 3 /* failed address bound checks */ -#define SEGV_PKUERR 4 +/* Failed address bound checks: */ +#ifndef SEGV_BNDERR +# define SEGV_BNDERR 3 +#endif + +#ifndef SEGV_PKUERR +# define SEGV_PKUERR 4 +#endif static char *si_code_str(int si_code) { @@ -289,13 +303,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) dump_mem(pkru_ptr - 128, 256); pkey_assert(*pkru_ptr); - si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); - dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); - dump_mem(si_pkey_ptr - 8, 24); - siginfo_pkey = *si_pkey_ptr; - pkey_assert(siginfo_pkey < NR_PKEYS); - last_si_pkey = siginfo_pkey; - if ((si->si_code == SEGV_MAPERR) || (si->si_code == SEGV_ACCERR) || (si->si_code == SEGV_BNDERR)) { @@ -303,6 +310,13 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) exit(4); } + si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); + dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); + dump_mem((u8 *)si_pkey_ptr - 8, 24); + siginfo_pkey = *si_pkey_ptr; + pkey_assert(siginfo_pkey < NR_PKEYS); + last_si_pkey = siginfo_pkey; + dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); /* need __rdpkru() version so we do not do shadow_pkru checking */ dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); @@ -311,22 +325,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); pkru_faults++; dprintf1("<<<<==================================================\n"); - return; - if (trapno == 14) { - fprintf(stderr, - "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n", - trapno, ip); - fprintf(stderr, "si_addr %p\n", si->si_addr); - fprintf(stderr, "REG_ERR: %lx\n", - (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); - exit(1); - } else { - fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip); - fprintf(stderr, "si_addr %p\n", si->si_addr); - fprintf(stderr, "REG_ERR: %lx\n", - (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); - exit(2); - } dprint_in_signal = 0; } @@ -393,10 +391,15 @@ pid_t fork_lazy_child(void) return forkret; } -#define PKEY_DISABLE_ACCESS 0x1 -#define PKEY_DISABLE_WRITE 0x2 +#ifndef PKEY_DISABLE_ACCESS +# define PKEY_DISABLE_ACCESS 0x1 +#endif + +#ifndef PKEY_DISABLE_WRITE +# define PKEY_DISABLE_WRITE 0x2 +#endif -u32 pkey_get(int pkey, unsigned long flags) +static u32 hw_pkey_get(int pkey, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); u32 pkru = __rdpkru(); @@ -418,7 +421,7 @@ u32 pkey_get(int pkey, unsigned long flags) return masked_pkru; } -int pkey_set(int pkey, unsigned long rights, unsigned long flags) +static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); u32 old_pkru = __rdpkru(); @@ -452,15 +455,15 @@ void pkey_disable_set(int pkey, int flags) pkey, flags); pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); - pkey_rights = pkey_get(pkey, syscall_flags); + pkey_rights = hw_pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights); pkey_assert(pkey_rights >= 0); pkey_rights |= flags; - ret = pkey_set(pkey, pkey_rights, syscall_flags); + ret = hw_pkey_set(pkey, pkey_rights, syscall_flags); assert(!ret); /*pkru and flags have the same format */ shadow_pkru |= flags << (pkey * 2); @@ -468,8 +471,8 @@ void pkey_disable_set(int pkey, int flags) pkey_assert(ret >= 0); - pkey_rights = pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey_rights = hw_pkey_get(pkey, syscall_flags); + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights); dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); @@ -483,24 +486,24 @@ void pkey_disable_clear(int pkey, int flags) { unsigned long syscall_flags = 0; int ret; - int pkey_rights = pkey_get(pkey, syscall_flags); + int pkey_rights = hw_pkey_get(pkey, syscall_flags); u32 orig_pkru = rdpkru(); pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights); pkey_assert(pkey_rights >= 0); pkey_rights |= flags; - ret = pkey_set(pkey, pkey_rights, 0); + ret = hw_pkey_set(pkey, pkey_rights, 0); /* pkru and flags have the same format */ shadow_pkru &= ~(flags << (pkey * 2)); pkey_assert(ret >= 0); - pkey_rights = pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey_rights = hw_pkey_get(pkey, syscall_flags); + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights); dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); @@ -674,10 +677,12 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, struct pkey_malloc_record { void *ptr; long size; + int prot; }; struct pkey_malloc_record *pkey_malloc_records; +struct pkey_malloc_record *pkey_last_malloc_record; long nr_pkey_malloc_records; -void record_pkey_malloc(void *ptr, long size) +void record_pkey_malloc(void *ptr, long size, int prot) { long i; struct pkey_malloc_record *rec = NULL; @@ -709,6 +714,8 @@ void record_pkey_malloc(void *ptr, long size) (int)(rec - pkey_malloc_records), rec, ptr, size); rec->ptr = ptr; rec->size = size; + rec->prot = prot; + pkey_last_malloc_record = rec; nr_pkey_malloc_records++; } @@ -753,7 +760,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) pkey_assert(ptr != (void *)-1); ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); pkey_assert(!ret); - record_pkey_malloc(ptr, size); + record_pkey_malloc(ptr, size, prot); rdpkru(); dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); @@ -774,7 +781,7 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) size = ALIGN_UP(size, HPAGE_SIZE * 2); ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); pkey_assert(ptr != (void *)-1); - record_pkey_malloc(ptr, size); + record_pkey_malloc(ptr, size, prot); mprotect_pkey(ptr, size, prot, pkey); dprintf1("unaligned ptr: %p\n", ptr); @@ -847,7 +854,7 @@ void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) pkey_assert(ptr != (void *)-1); mprotect_pkey(ptr, size, prot, pkey); - record_pkey_malloc(ptr, size); + record_pkey_malloc(ptr, size, prot); dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); return ptr; @@ -869,7 +876,7 @@ void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) mprotect_pkey(ptr, size, prot, pkey); - record_pkey_malloc(ptr, size); + record_pkey_malloc(ptr, size, prot); dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); close(fd); @@ -918,13 +925,21 @@ void *malloc_pkey(long size, int prot, u16 pkey) } int last_pkru_faults; +#define UNKNOWN_PKEY -2 void expected_pk_fault(int pkey) { dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", __func__, last_pkru_faults, pkru_faults); dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); pkey_assert(last_pkru_faults + 1 == pkru_faults); - pkey_assert(last_si_pkey == pkey); + + /* + * For exec-only memory, we do not know the pkey in + * advance, so skip this check. + */ + if (pkey != UNKNOWN_PKEY) + pkey_assert(last_si_pkey == pkey); + /* * The signal handler shold have cleared out PKRU to let the * test program continue. We now have to restore it. @@ -939,10 +954,11 @@ void expected_pk_fault(int pkey) last_si_pkey = -1; } -void do_not_expect_pk_fault(void) -{ - pkey_assert(last_pkru_faults == pkru_faults); -} +#define do_not_expect_pk_fault(msg) do { \ + if (last_pkru_faults != pkru_faults) \ + dprintf0("unexpected PK fault: %s\n", msg); \ + pkey_assert(last_pkru_faults == pkru_faults); \ +} while (0) int test_fds[10] = { -1 }; int nr_test_fds; @@ -1151,12 +1167,15 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) pkey_assert(i < NR_PKEYS*2); /* - * There are 16 pkeys supported in hardware. One is taken - * up for the default (0) and another can be taken up by - * an execute-only mapping. Ensure that we can allocate - * at least 14 (16-2). + * There are 16 pkeys supported in hardware. Three are + * allocated by the time we get here: + * 1. The default key (0) + * 2. One possibly consumed by an execute-only mapping. + * 3. One allocated by the test code and passed in via + * 'pkey' to this function. + * Ensure that we can allocate at least another 13 (16-3). */ - pkey_assert(i >= NR_PKEYS-2); + pkey_assert(i >= NR_PKEYS-3); for (i = 0; i < nr_allocated_pkeys; i++) { err = sys_pkey_free(allocated_pkeys[i]); @@ -1165,6 +1184,35 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) } } +/* + * pkey 0 is special. It is allocated by default, so you do not + * have to call pkey_alloc() to use it first. Make sure that it + * is usable. + */ +void test_mprotect_with_pkey_0(int *ptr, u16 pkey) +{ + long size; + int prot; + + assert(pkey_last_malloc_record); + size = pkey_last_malloc_record->size; + /* + * This is a bit of a hack. But mprotect() requires + * huge-page-aligned sizes when operating on hugetlbfs. + * So, make sure that we use something that's a multiple + * of a huge page when we can. + */ + if (size >= HPAGE_SIZE) + size = HPAGE_SIZE; + prot = pkey_last_malloc_record->prot; + + /* Use pkey 0 */ + mprotect_pkey(ptr, size, prot, 0); + + /* Make sure that we can set it back to the original pkey. */ + mprotect_pkey(ptr, size, prot, pkey); +} + void test_ptrace_of_child(int *ptr, u16 pkey) { __attribute__((__unused__)) int peek_result; @@ -1228,7 +1276,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey) pkey_assert(ret != -1); /* Now access from the current task, and expect NO exception: */ peek_result = read_ptr(plain_ptr); - do_not_expect_pk_fault(); + do_not_expect_pk_fault("read plain pointer after ptrace"); ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); pkey_assert(ret != -1); @@ -1241,12 +1289,9 @@ void test_ptrace_of_child(int *ptr, u16 pkey) free(plain_ptr_unaligned); } -void test_executing_on_unreadable_memory(int *ptr, u16 pkey) +void *get_pointer_to_instructions(void) { void *p1; - int scratch; - int ptr_contents; - int ret; p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); @@ -1256,7 +1301,23 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) /* Point 'p1' at the *second* page of the function: */ p1 += PAGE_SIZE; + /* + * Try to ensure we fault this in on next touch to ensure + * we get an instruction fault as opposed to a data one + */ madvise(p1, PAGE_SIZE, MADV_DONTNEED); + + return p1; +} + +void test_executing_on_unreadable_memory(int *ptr, u16 pkey) +{ + void *p1; + int scratch; + int ptr_contents; + int ret; + + p1 = get_pointer_to_instructions(); lots_o_noops_around_write(&scratch); ptr_contents = read_ptr(p1); dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); @@ -1272,12 +1333,55 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) */ madvise(p1, PAGE_SIZE, MADV_DONTNEED); lots_o_noops_around_write(&scratch); - do_not_expect_pk_fault(); + do_not_expect_pk_fault("executing on PROT_EXEC memory"); ptr_contents = read_ptr(p1); dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); expected_pk_fault(pkey); } +void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) +{ + void *p1; + int scratch; + int ptr_contents; + int ret; + + dprintf1("%s() start\n", __func__); + + p1 = get_pointer_to_instructions(); + lots_o_noops_around_write(&scratch); + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + + /* Use a *normal* mprotect(), not mprotect_pkey(): */ + ret = mprotect(p1, PAGE_SIZE, PROT_EXEC); + pkey_assert(!ret); + + dprintf2("pkru: %x\n", rdpkru()); + + /* Make sure this is an *instruction* fault */ + madvise(p1, PAGE_SIZE, MADV_DONTNEED); + lots_o_noops_around_write(&scratch); + do_not_expect_pk_fault("executing on PROT_EXEC memory"); + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + expected_pk_fault(UNKNOWN_PKEY); + + /* + * Put the memory back to non-PROT_EXEC. Should clear the + * exec-only pkey off the VMA and allow it to be readable + * again. Go to PROT_NONE first to check for a kernel bug + * that did not clear the pkey when doing PROT_NONE. + */ + ret = mprotect(p1, PAGE_SIZE, PROT_NONE); + pkey_assert(!ret); + + ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC); + pkey_assert(!ret); + ptr_contents = read_ptr(p1); + do_not_expect_pk_fault("plain read on recently PROT_EXEC area"); +} + void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) { int size = PAGE_SIZE; @@ -1302,6 +1406,8 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = { test_kernel_gup_of_access_disabled_region, test_kernel_gup_write_to_write_disabled_region, test_executing_on_unreadable_memory, + test_implicit_mprotect_exec_only_memory, + test_mprotect_with_pkey_0, test_ptrace_of_child, test_pkey_syscalls_on_non_allocated_pkey, test_pkey_syscalls_bad_args, diff --git a/tools/testing/selftests/x86/test_syscall_vdso.c b/tools/testing/selftests/x86/test_syscall_vdso.c index 40370354d4c1..c9c3281077bc 100644 --- a/tools/testing/selftests/x86/test_syscall_vdso.c +++ b/tools/testing/selftests/x86/test_syscall_vdso.c @@ -100,12 +100,19 @@ asm ( " shl $32, %r8\n" " orq $0x7f7f7f7f, %r8\n" " movq %r8, %r9\n" - " movq %r8, %r10\n" - " movq %r8, %r11\n" - " movq %r8, %r12\n" - " movq %r8, %r13\n" - " movq %r8, %r14\n" - " movq %r8, %r15\n" + " incq %r9\n" + " movq %r9, %r10\n" + " incq %r10\n" + " movq %r10, %r11\n" + " incq %r11\n" + " movq %r11, %r12\n" + " incq %r12\n" + " movq %r12, %r13\n" + " incq %r13\n" + " movq %r13, %r14\n" + " incq %r14\n" + " movq %r14, %r15\n" + " incq %r15\n" " ret\n" " .code32\n" " .popsection\n" @@ -128,12 +135,13 @@ int check_regs64(void) int err = 0; int num = 8; uint64_t *r64 = ®s64.r8; + uint64_t expected = 0x7f7f7f7f7f7f7f7fULL; if (!kernel_is_64bit) return 0; do { - if (*r64 == 0x7f7f7f7f7f7f7f7fULL) + if (*r64 == expected++) continue; /* register did not change */ if (syscall_addr != (long)&int80) { /* @@ -147,18 +155,17 @@ int check_regs64(void) continue; } } else { - /* INT80 syscall entrypoint can be used by + /* + * INT80 syscall entrypoint can be used by * 64-bit programs too, unlike SYSCALL/SYSENTER. * Therefore it must preserve R12+ * (they are callee-saved registers in 64-bit C ABI). * - * This was probably historically not intended, - * but R8..11 are clobbered (cleared to 0). - * IOW: they are the only registers which aren't - * preserved across INT80 syscall. + * Starting in Linux 4.17 (and any kernel that + * backports the change), R8..11 are preserved. + * Historically (and probably unintentionally), they + * were clobbered or zeroed. */ - if (*r64 == 0 && num <= 11) - continue; } printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64); err++; diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h index 1571e24e9494..f91aeb5fe571 100644 --- a/tools/virtio/linux/dma-mapping.h +++ b/tools/virtio/linux/dma-mapping.h @@ -6,8 +6,6 @@ # error Virtio userspace code does not support CONFIG_HAS_DMA #endif -#define PCI_DMA_BUS_IS_PHYS 1 - enum dma_data_direction { DMA_BIDIRECTIONAL = 0, DMA_TO_DEVICE = 1, |